1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 PetscBool merged; 977 978 PetscFunctionBegin; 979 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 980 /* do nondiagonal part */ 981 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 982 if (!merged) { 983 /* send it on its way */ 984 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 985 /* do local part */ 986 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 987 /* receive remote parts: note this assumes the values are not actually */ 988 /* added in yy until the next line, */ 989 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 } else { 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* send it on its way */ 994 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 /* values actually were received in the Begin() but we need to call this nop */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 /* Easy test: symmetric diagonal block */ 1013 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1014 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1015 if (!*f) PetscFunctionReturn(0); 1016 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1018 if (size == 1) PetscFunctionReturn(0); 1019 1020 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1021 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1022 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1024 for (i=0; i<first; i++) notme[i] = i; 1025 for (i=last; i<M; i++) notme[i-last+first] = i; 1026 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1027 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1028 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1029 Aoff = Aoffs[0]; 1030 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1031 Boff = Boffs[0]; 1032 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1033 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1035 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1037 ierr = PetscFree(notme);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1042 { 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1051 { 1052 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 1055 PetscFunctionBegin; 1056 /* do nondiagonal part */ 1057 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1058 /* send it on its way */ 1059 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 /* do local part */ 1061 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 /* receive remote parts */ 1063 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 PetscErrorCode ierr; 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1075 1076 PetscFunctionBegin; 1077 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1078 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1079 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1090 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1095 { 1096 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 #if defined(PETSC_USE_LOG) 1101 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1102 #endif 1103 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1104 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1105 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1106 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1107 #if defined(PETSC_USE_CTABLE) 1108 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1109 #else 1110 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1113 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1114 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1115 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1116 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1117 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1118 1119 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1127 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1128 #if defined(PETSC_HAVE_ELEMENTAL) 1129 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1130 #endif 1131 #if defined(PETSC_HAVE_HYPRE) 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1133 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1134 #endif 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1139 { 1140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1141 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1142 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1143 PetscErrorCode ierr; 1144 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1145 int fd; 1146 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1147 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1148 PetscScalar *column_values; 1149 PetscInt message_count,flowcontrolcount; 1150 FILE *file; 1151 1152 PetscFunctionBegin; 1153 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1154 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1155 nz = A->nz + B->nz; 1156 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1157 if (!rank) { 1158 header[0] = MAT_FILE_CLASSID; 1159 header[1] = mat->rmap->N; 1160 header[2] = mat->cmap->N; 1161 1162 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1163 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1164 /* get largest number of rows any processor has */ 1165 rlen = mat->rmap->n; 1166 range = mat->rmap->range; 1167 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1168 } else { 1169 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1170 rlen = mat->rmap->n; 1171 } 1172 1173 /* load up the local row counts */ 1174 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1175 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1176 1177 /* store the row lengths to the file */ 1178 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1179 if (!rank) { 1180 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1181 for (i=1; i<size; i++) { 1182 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1183 rlen = range[i+1] - range[i]; 1184 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1185 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1186 } 1187 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1188 } else { 1189 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1190 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1192 } 1193 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1194 1195 /* load up the local column indices */ 1196 nzmax = nz; /* th processor needs space a largest processor needs */ 1197 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1198 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1199 cnt = 0; 1200 for (i=0; i<mat->rmap->n; i++) { 1201 for (j=B->i[i]; j<B->i[i+1]; j++) { 1202 if ((col = garray[B->j[j]]) > cstart) break; 1203 column_indices[cnt++] = col; 1204 } 1205 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1206 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1207 } 1208 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1209 1210 /* store the column indices to the file */ 1211 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1212 if (!rank) { 1213 MPI_Status status; 1214 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1215 for (i=1; i<size; i++) { 1216 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1217 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1218 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1219 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1220 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1221 } 1222 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1223 } else { 1224 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1225 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1227 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1228 } 1229 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1230 1231 /* load up the local column values */ 1232 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1233 cnt = 0; 1234 for (i=0; i<mat->rmap->n; i++) { 1235 for (j=B->i[i]; j<B->i[i+1]; j++) { 1236 if (garray[B->j[j]] > cstart) break; 1237 column_values[cnt++] = B->a[j]; 1238 } 1239 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1240 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1241 } 1242 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1243 1244 /* store the column values to the file */ 1245 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1246 if (!rank) { 1247 MPI_Status status; 1248 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1249 for (i=1; i<size; i++) { 1250 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1251 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1252 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1253 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1257 } else { 1258 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1259 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1262 } 1263 ierr = PetscFree(column_values);CHKERRQ(ierr); 1264 1265 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1266 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #include <petscdraw.h> 1271 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1272 { 1273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1274 PetscErrorCode ierr; 1275 PetscMPIInt rank = aij->rank,size = aij->size; 1276 PetscBool isdraw,iascii,isbinary; 1277 PetscViewer sviewer; 1278 PetscViewerFormat format; 1279 1280 PetscFunctionBegin; 1281 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1282 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1283 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1284 if (iascii) { 1285 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1286 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1287 MatInfo info; 1288 PetscBool inodes; 1289 1290 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1291 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1292 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1293 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1294 if (!inodes) { 1295 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1296 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1297 } else { 1298 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1299 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1300 } 1301 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1302 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1303 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1305 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1307 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1308 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1309 PetscFunctionReturn(0); 1310 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1311 PetscInt inodecount,inodelimit,*inodes; 1312 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1313 if (inodes) { 1314 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1315 } else { 1316 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1317 } 1318 PetscFunctionReturn(0); 1319 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1320 PetscFunctionReturn(0); 1321 } 1322 } else if (isbinary) { 1323 if (size == 1) { 1324 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1325 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1326 } else { 1327 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1328 } 1329 PetscFunctionReturn(0); 1330 } else if (isdraw) { 1331 PetscDraw draw; 1332 PetscBool isnull; 1333 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1334 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1335 if (isnull) PetscFunctionReturn(0); 1336 } 1337 1338 { 1339 /* assemble the entire matrix onto first processor. */ 1340 Mat A; 1341 Mat_SeqAIJ *Aloc; 1342 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1343 MatScalar *a; 1344 1345 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1346 if (!rank) { 1347 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1348 } else { 1349 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1350 } 1351 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1352 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1353 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1354 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1355 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1356 1357 /* copy over the A part */ 1358 Aloc = (Mat_SeqAIJ*)aij->A->data; 1359 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1360 row = mat->rmap->rstart; 1361 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1362 for (i=0; i<m; i++) { 1363 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1364 row++; 1365 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1366 } 1367 aj = Aloc->j; 1368 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1369 1370 /* copy over the B part */ 1371 Aloc = (Mat_SeqAIJ*)aij->B->data; 1372 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1373 row = mat->rmap->rstart; 1374 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1375 ct = cols; 1376 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1377 for (i=0; i<m; i++) { 1378 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1379 row++; 1380 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1381 } 1382 ierr = PetscFree(ct);CHKERRQ(ierr); 1383 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1385 /* 1386 Everyone has to call to draw the matrix since the graphics waits are 1387 synchronized across all processors that share the PetscDraw object 1388 */ 1389 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1390 if (!rank) { 1391 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1392 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1393 } 1394 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1395 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1396 ierr = MatDestroy(&A);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1402 { 1403 PetscErrorCode ierr; 1404 PetscBool iascii,isdraw,issocket,isbinary; 1405 1406 PetscFunctionBegin; 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1409 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1410 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1411 if (iascii || isdraw || isbinary || issocket) { 1412 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1418 { 1419 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1420 PetscErrorCode ierr; 1421 Vec bb1 = 0; 1422 PetscBool hasop; 1423 1424 PetscFunctionBegin; 1425 if (flag == SOR_APPLY_UPPER) { 1426 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1431 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1432 } 1433 1434 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1435 if (flag & SOR_ZERO_INITIAL_GUESS) { 1436 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1437 its--; 1438 } 1439 1440 while (its--) { 1441 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1443 1444 /* update rhs: bb1 = bb - B*x */ 1445 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1446 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1447 1448 /* local sweep */ 1449 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1450 } 1451 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1452 if (flag & SOR_ZERO_INITIAL_GUESS) { 1453 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1454 its--; 1455 } 1456 while (its--) { 1457 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1459 1460 /* update rhs: bb1 = bb - B*x */ 1461 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1462 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1463 1464 /* local sweep */ 1465 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1466 } 1467 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1468 if (flag & SOR_ZERO_INITIAL_GUESS) { 1469 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1470 its--; 1471 } 1472 while (its--) { 1473 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1475 1476 /* update rhs: bb1 = bb - B*x */ 1477 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1478 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1479 1480 /* local sweep */ 1481 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1482 } 1483 } else if (flag & SOR_EISENSTAT) { 1484 Vec xx1; 1485 1486 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1487 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1488 1489 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 if (!mat->diag) { 1492 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1493 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1494 } 1495 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1496 if (hasop) { 1497 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1498 } else { 1499 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1500 } 1501 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1502 1503 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1504 1505 /* local sweep */ 1506 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1507 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1508 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1509 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1510 1511 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1512 1513 matin->factorerrortype = mat->A->factorerrortype; 1514 PetscFunctionReturn(0); 1515 } 1516 1517 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1518 { 1519 Mat aA,aB,Aperm; 1520 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1521 PetscScalar *aa,*ba; 1522 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1523 PetscSF rowsf,sf; 1524 IS parcolp = NULL; 1525 PetscBool done; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1530 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1531 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1532 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1533 1534 /* Invert row permutation to find out where my rows should go */ 1535 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1536 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1537 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1538 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1539 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1540 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1541 1542 /* Invert column permutation to find out where my columns should go */ 1543 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1544 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1545 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1546 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1547 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1548 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1549 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1550 1551 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1552 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1553 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1554 1555 /* Find out where my gcols should go */ 1556 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1557 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1558 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1559 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1560 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1561 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1562 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1563 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1564 1565 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1566 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1567 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1568 for (i=0; i<m; i++) { 1569 PetscInt row = rdest[i],rowner; 1570 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1571 for (j=ai[i]; j<ai[i+1]; j++) { 1572 PetscInt cowner,col = cdest[aj[j]]; 1573 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 for (j=bi[i]; j<bi[i+1]; j++) { 1578 PetscInt cowner,col = gcdest[bj[j]]; 1579 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1580 if (rowner == cowner) dnnz[i]++; 1581 else onnz[i]++; 1582 } 1583 } 1584 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1585 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1586 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1587 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1588 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1589 1590 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1592 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) { 1594 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1595 PetscInt j0,rowlen; 1596 rowlen = ai[i+1] - ai[i]; 1597 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1598 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1599 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1600 } 1601 rowlen = bi[i+1] - bi[i]; 1602 for (j0=j=0; j<rowlen; j0=j) { 1603 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1604 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1605 } 1606 } 1607 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1610 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1612 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1613 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1614 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1615 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1616 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1617 *B = Aperm; 1618 PetscFunctionReturn(0); 1619 } 1620 1621 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1622 { 1623 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1624 PetscErrorCode ierr; 1625 1626 PetscFunctionBegin; 1627 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1628 if (ghosts) *ghosts = aij->garray; 1629 PetscFunctionReturn(0); 1630 } 1631 1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1633 { 1634 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1635 Mat A = mat->A,B = mat->B; 1636 PetscErrorCode ierr; 1637 PetscReal isend[5],irecv[5]; 1638 1639 PetscFunctionBegin; 1640 info->block_size = 1.0; 1641 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1642 1643 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1644 isend[3] = info->memory; isend[4] = info->mallocs; 1645 1646 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1647 1648 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1649 isend[3] += info->memory; isend[4] += info->mallocs; 1650 if (flag == MAT_LOCAL) { 1651 info->nz_used = isend[0]; 1652 info->nz_allocated = isend[1]; 1653 info->nz_unneeded = isend[2]; 1654 info->memory = isend[3]; 1655 info->mallocs = isend[4]; 1656 } else if (flag == MAT_GLOBAL_MAX) { 1657 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } else if (flag == MAT_GLOBAL_SUM) { 1665 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } 1673 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1674 info->fill_ratio_needed = 0; 1675 info->factor_mallocs = 0; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 switch (op) { 1686 case MAT_NEW_NONZERO_LOCATIONS: 1687 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1688 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1689 case MAT_KEEP_NONZERO_PATTERN: 1690 case MAT_NEW_NONZERO_LOCATION_ERR: 1691 case MAT_USE_INODES: 1692 case MAT_IGNORE_ZERO_ENTRIES: 1693 MatCheckPreallocated(A,1); 1694 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1695 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A,1); 1699 a->roworiented = flg; 1700 1701 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1702 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1703 break; 1704 case MAT_NEW_DIAGONALS: 1705 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1706 break; 1707 case MAT_IGNORE_OFF_PROC_ENTRIES: 1708 a->donotstash = flg; 1709 break; 1710 case MAT_SPD: 1711 A->spd_set = PETSC_TRUE; 1712 A->spd = flg; 1713 if (flg) { 1714 A->symmetric = PETSC_TRUE; 1715 A->structurally_symmetric = PETSC_TRUE; 1716 A->symmetric_set = PETSC_TRUE; 1717 A->structurally_symmetric_set = PETSC_TRUE; 1718 } 1719 break; 1720 case MAT_SYMMETRIC: 1721 MatCheckPreallocated(A,1); 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_STRUCTURALLY_SYMMETRIC: 1725 MatCheckPreallocated(A,1); 1726 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1727 break; 1728 case MAT_HERMITIAN: 1729 MatCheckPreallocated(A,1); 1730 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1731 break; 1732 case MAT_SYMMETRY_ETERNAL: 1733 MatCheckPreallocated(A,1); 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 break; 1736 case MAT_SUBMAT_SINGLEIS: 1737 A->submat_singleis = flg; 1738 break; 1739 case MAT_STRUCTURE_ONLY: 1740 /* The option is handled directly by MatSetOption() */ 1741 break; 1742 default: 1743 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1749 { 1750 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1751 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1752 PetscErrorCode ierr; 1753 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1754 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1755 PetscInt *cmap,*idx_p; 1756 1757 PetscFunctionBegin; 1758 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1759 mat->getrowactive = PETSC_TRUE; 1760 1761 if (!mat->rowvalues && (idx || v)) { 1762 /* 1763 allocate enough space to hold information from the longest row. 1764 */ 1765 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1766 PetscInt max = 1,tmp; 1767 for (i=0; i<matin->rmap->n; i++) { 1768 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1769 if (max < tmp) max = tmp; 1770 } 1771 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1772 } 1773 1774 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1775 lrow = row - rstart; 1776 1777 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1778 if (!v) {pvA = 0; pvB = 0;} 1779 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1780 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1781 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1782 nztot = nzA + nzB; 1783 1784 cmap = mat->garray; 1785 if (v || idx) { 1786 if (nztot) { 1787 /* Sort by increasing column numbers, assuming A and B already sorted */ 1788 PetscInt imark = -1; 1789 if (v) { 1790 *v = v_p = mat->rowvalues; 1791 for (i=0; i<nzB; i++) { 1792 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1793 else break; 1794 } 1795 imark = i; 1796 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1797 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1798 } 1799 if (idx) { 1800 *idx = idx_p = mat->rowindices; 1801 if (imark > -1) { 1802 for (i=0; i<imark; i++) { 1803 idx_p[i] = cmap[cworkB[i]]; 1804 } 1805 } else { 1806 for (i=0; i<nzB; i++) { 1807 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1808 else break; 1809 } 1810 imark = i; 1811 } 1812 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1813 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1814 } 1815 } else { 1816 if (idx) *idx = 0; 1817 if (v) *v = 0; 1818 } 1819 } 1820 *nz = nztot; 1821 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1822 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1827 { 1828 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1829 1830 PetscFunctionBegin; 1831 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1832 aij->getrowactive = PETSC_FALSE; 1833 PetscFunctionReturn(0); 1834 } 1835 1836 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1837 { 1838 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1839 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1840 PetscErrorCode ierr; 1841 PetscInt i,j,cstart = mat->cmap->rstart; 1842 PetscReal sum = 0.0; 1843 MatScalar *v; 1844 1845 PetscFunctionBegin; 1846 if (aij->size == 1) { 1847 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1848 } else { 1849 if (type == NORM_FROBENIUS) { 1850 v = amat->a; 1851 for (i=0; i<amat->nz; i++) { 1852 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1853 } 1854 v = bmat->a; 1855 for (i=0; i<bmat->nz; i++) { 1856 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1857 } 1858 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1859 *norm = PetscSqrtReal(*norm); 1860 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1861 } else if (type == NORM_1) { /* max column norm */ 1862 PetscReal *tmp,*tmp2; 1863 PetscInt *jj,*garray = aij->garray; 1864 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1865 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1866 *norm = 0.0; 1867 v = amat->a; jj = amat->j; 1868 for (j=0; j<amat->nz; j++) { 1869 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1870 } 1871 v = bmat->a; jj = bmat->j; 1872 for (j=0; j<bmat->nz; j++) { 1873 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1874 } 1875 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1876 for (j=0; j<mat->cmap->N; j++) { 1877 if (tmp2[j] > *norm) *norm = tmp2[j]; 1878 } 1879 ierr = PetscFree(tmp);CHKERRQ(ierr); 1880 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1881 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1882 } else if (type == NORM_INFINITY) { /* max row norm */ 1883 PetscReal ntemp = 0.0; 1884 for (j=0; j<aij->A->rmap->n; j++) { 1885 v = amat->a + amat->i[j]; 1886 sum = 0.0; 1887 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); v++; 1889 } 1890 v = bmat->a + bmat->i[j]; 1891 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1892 sum += PetscAbsScalar(*v); v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1897 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1898 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1899 } 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1904 { 1905 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1906 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1907 PetscErrorCode ierr; 1908 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1909 PetscInt cstart = A->cmap->rstart,ncol; 1910 Mat B; 1911 MatScalar *array; 1912 1913 PetscFunctionBegin; 1914 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1915 1916 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1917 ai = Aloc->i; aj = Aloc->j; 1918 bi = Bloc->i; bj = Bloc->j; 1919 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1920 PetscInt *d_nnz,*g_nnz,*o_nnz; 1921 PetscSFNode *oloc; 1922 PETSC_UNUSED PetscSF sf; 1923 1924 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1925 /* compute d_nnz for preallocation */ 1926 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1927 for (i=0; i<ai[ma]; i++) { 1928 d_nnz[aj[i]]++; 1929 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1930 } 1931 /* compute local off-diagonal contributions */ 1932 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1934 /* map those to global */ 1935 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1936 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1937 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1938 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1939 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1940 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1941 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1942 1943 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1944 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1945 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1946 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1947 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1948 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1949 } else { 1950 B = *matout; 1951 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1952 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1953 } 1954 1955 /* copy over the A part */ 1956 array = Aloc->a; 1957 row = A->rmap->rstart; 1958 for (i=0; i<ma; i++) { 1959 ncol = ai[i+1]-ai[i]; 1960 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1961 row++; 1962 array += ncol; aj += ncol; 1963 } 1964 aj = Aloc->j; 1965 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1966 1967 /* copy over the B part */ 1968 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1969 array = Bloc->a; 1970 row = A->rmap->rstart; 1971 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1972 cols_tmp = cols; 1973 for (i=0; i<mb; i++) { 1974 ncol = bi[i+1]-bi[i]; 1975 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1976 row++; 1977 array += ncol; cols_tmp += ncol; 1978 } 1979 ierr = PetscFree(cols);CHKERRQ(ierr); 1980 1981 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1983 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1984 *matout = B; 1985 } else { 1986 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1987 } 1988 PetscFunctionReturn(0); 1989 } 1990 1991 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1992 { 1993 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1994 Mat a = aij->A,b = aij->B; 1995 PetscErrorCode ierr; 1996 PetscInt s1,s2,s3; 1997 1998 PetscFunctionBegin; 1999 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2000 if (rr) { 2001 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2002 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2003 /* Overlap communication with computation. */ 2004 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2005 } 2006 if (ll) { 2007 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2008 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2009 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2010 } 2011 /* scale the diagonal block */ 2012 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2013 2014 if (rr) { 2015 /* Do a scatter end and then right scale the off-diagonal block */ 2016 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2017 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2023 { 2024 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2025 PetscErrorCode ierr; 2026 2027 PetscFunctionBegin; 2028 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2029 PetscFunctionReturn(0); 2030 } 2031 2032 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2033 { 2034 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2035 Mat a,b,c,d; 2036 PetscBool flg; 2037 PetscErrorCode ierr; 2038 2039 PetscFunctionBegin; 2040 a = matA->A; b = matA->B; 2041 c = matB->A; d = matB->B; 2042 2043 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2044 if (flg) { 2045 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2046 } 2047 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2052 { 2053 PetscErrorCode ierr; 2054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2055 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2056 2057 PetscFunctionBegin; 2058 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2059 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2060 /* because of the column compression in the off-processor part of the matrix a->B, 2061 the number of columns in a->B and b->B may be different, hence we cannot call 2062 the MatCopy() directly on the two parts. If need be, we can provide a more 2063 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2064 then copying the submatrices */ 2065 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2066 } else { 2067 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2068 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2069 } 2070 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2075 { 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /* 2084 Computes the number of nonzeros per row needed for preallocation when X and Y 2085 have different nonzero structure. 2086 */ 2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2088 { 2089 PetscInt i,j,k,nzx,nzy; 2090 2091 PetscFunctionBegin; 2092 /* Set the number of nonzeros in the new matrix */ 2093 for (i=0; i<m; i++) { 2094 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2095 nzx = xi[i+1] - xi[i]; 2096 nzy = yi[i+1] - yi[i]; 2097 nnz[i] = 0; 2098 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2099 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2100 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2101 nnz[i]++; 2102 } 2103 for (; k<nzy; k++) nnz[i]++; 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2110 { 2111 PetscErrorCode ierr; 2112 PetscInt m = Y->rmap->N; 2113 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2114 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2115 2116 PetscFunctionBegin; 2117 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2122 { 2123 PetscErrorCode ierr; 2124 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2125 PetscBLASInt bnz,one=1; 2126 Mat_SeqAIJ *x,*y; 2127 2128 PetscFunctionBegin; 2129 if (str == SAME_NONZERO_PATTERN) { 2130 PetscScalar alpha = a; 2131 x = (Mat_SeqAIJ*)xx->A->data; 2132 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2133 y = (Mat_SeqAIJ*)yy->A->data; 2134 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2135 x = (Mat_SeqAIJ*)xx->B->data; 2136 y = (Mat_SeqAIJ*)yy->B->data; 2137 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2138 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2139 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2140 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2141 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2142 } else { 2143 Mat B; 2144 PetscInt *nnz_d,*nnz_o; 2145 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2146 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2147 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2148 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2149 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2150 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2151 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2152 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2153 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2154 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2155 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2156 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2157 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2158 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2159 } 2160 PetscFunctionReturn(0); 2161 } 2162 2163 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2164 2165 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2166 { 2167 #if defined(PETSC_USE_COMPLEX) 2168 PetscErrorCode ierr; 2169 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2170 2171 PetscFunctionBegin; 2172 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2173 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2174 #else 2175 PetscFunctionBegin; 2176 #endif 2177 PetscFunctionReturn(0); 2178 } 2179 2180 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2181 { 2182 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2183 PetscErrorCode ierr; 2184 2185 PetscFunctionBegin; 2186 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2187 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2192 { 2193 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2194 PetscErrorCode ierr; 2195 2196 PetscFunctionBegin; 2197 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2198 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 PetscInt i,*idxb = 0; 2207 PetscScalar *va,*vb; 2208 Vec vtmp; 2209 2210 PetscFunctionBegin; 2211 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2212 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2213 if (idx) { 2214 for (i=0; i<A->rmap->n; i++) { 2215 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2216 } 2217 } 2218 2219 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2220 if (idx) { 2221 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2222 } 2223 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2224 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2225 2226 for (i=0; i<A->rmap->n; i++) { 2227 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2228 va[i] = vb[i]; 2229 if (idx) idx[i] = a->garray[idxb[i]]; 2230 } 2231 } 2232 2233 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2234 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2235 ierr = PetscFree(idxb);CHKERRQ(ierr); 2236 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2241 { 2242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2243 PetscErrorCode ierr; 2244 PetscInt i,*idxb = 0; 2245 PetscScalar *va,*vb; 2246 Vec vtmp; 2247 2248 PetscFunctionBegin; 2249 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2250 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2251 if (idx) { 2252 for (i=0; i<A->cmap->n; i++) { 2253 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2254 } 2255 } 2256 2257 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2258 if (idx) { 2259 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2260 } 2261 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2262 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2263 2264 for (i=0; i<A->rmap->n; i++) { 2265 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2266 va[i] = vb[i]; 2267 if (idx) idx[i] = a->garray[idxb[i]]; 2268 } 2269 } 2270 2271 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2272 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2273 ierr = PetscFree(idxb);CHKERRQ(ierr); 2274 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2279 { 2280 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2281 PetscInt n = A->rmap->n; 2282 PetscInt cstart = A->cmap->rstart; 2283 PetscInt *cmap = mat->garray; 2284 PetscInt *diagIdx, *offdiagIdx; 2285 Vec diagV, offdiagV; 2286 PetscScalar *a, *diagA, *offdiagA; 2287 PetscInt r; 2288 PetscErrorCode ierr; 2289 2290 PetscFunctionBegin; 2291 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2292 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2293 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2294 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2295 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2296 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2297 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2298 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2299 for (r = 0; r < n; ++r) { 2300 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 idx[r] = cstart + diagIdx[r]; 2303 } else { 2304 a[r] = offdiagA[r]; 2305 idx[r] = cmap[offdiagIdx[r]]; 2306 } 2307 } 2308 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2309 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2310 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2311 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2312 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2313 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2318 { 2319 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2320 PetscInt n = A->rmap->n; 2321 PetscInt cstart = A->cmap->rstart; 2322 PetscInt *cmap = mat->garray; 2323 PetscInt *diagIdx, *offdiagIdx; 2324 Vec diagV, offdiagV; 2325 PetscScalar *a, *diagA, *offdiagA; 2326 PetscInt r; 2327 PetscErrorCode ierr; 2328 2329 PetscFunctionBegin; 2330 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2331 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2332 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2333 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2334 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2335 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2336 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2337 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2338 for (r = 0; r < n; ++r) { 2339 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2340 a[r] = diagA[r]; 2341 idx[r] = cstart + diagIdx[r]; 2342 } else { 2343 a[r] = offdiagA[r]; 2344 idx[r] = cmap[offdiagIdx[r]]; 2345 } 2346 } 2347 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2348 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2349 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2350 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2351 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2352 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2353 PetscFunctionReturn(0); 2354 } 2355 2356 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2357 { 2358 PetscErrorCode ierr; 2359 Mat *dummy; 2360 2361 PetscFunctionBegin; 2362 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2363 *newmat = *dummy; 2364 ierr = PetscFree(dummy);CHKERRQ(ierr); 2365 PetscFunctionReturn(0); 2366 } 2367 2368 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2369 { 2370 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2371 PetscErrorCode ierr; 2372 2373 PetscFunctionBegin; 2374 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2375 A->factorerrortype = a->A->factorerrortype; 2376 PetscFunctionReturn(0); 2377 } 2378 2379 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2380 { 2381 PetscErrorCode ierr; 2382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2383 2384 PetscFunctionBegin; 2385 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2386 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2387 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2393 { 2394 PetscFunctionBegin; 2395 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2396 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 /*@ 2401 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2402 2403 Collective on Mat 2404 2405 Input Parameters: 2406 + A - the matrix 2407 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2408 2409 Level: advanced 2410 2411 @*/ 2412 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2413 { 2414 PetscErrorCode ierr; 2415 2416 PetscFunctionBegin; 2417 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2422 { 2423 PetscErrorCode ierr; 2424 PetscBool sc = PETSC_FALSE,flg; 2425 2426 PetscFunctionBegin; 2427 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2428 ierr = PetscObjectOptionsBegin((PetscObject)A); 2429 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2430 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2431 if (flg) { 2432 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2433 } 2434 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2439 { 2440 PetscErrorCode ierr; 2441 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2442 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2443 2444 PetscFunctionBegin; 2445 if (!Y->preallocated) { 2446 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2447 } else if (!aij->nz) { 2448 PetscInt nonew = aij->nonew; 2449 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2450 aij->nonew = nonew; 2451 } 2452 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2457 { 2458 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2463 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2464 if (d) { 2465 PetscInt rstart; 2466 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2467 *d += rstart; 2468 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 2474 /* -------------------------------------------------------------------*/ 2475 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2476 MatGetRow_MPIAIJ, 2477 MatRestoreRow_MPIAIJ, 2478 MatMult_MPIAIJ, 2479 /* 4*/ MatMultAdd_MPIAIJ, 2480 MatMultTranspose_MPIAIJ, 2481 MatMultTransposeAdd_MPIAIJ, 2482 0, 2483 0, 2484 0, 2485 /*10*/ 0, 2486 0, 2487 0, 2488 MatSOR_MPIAIJ, 2489 MatTranspose_MPIAIJ, 2490 /*15*/ MatGetInfo_MPIAIJ, 2491 MatEqual_MPIAIJ, 2492 MatGetDiagonal_MPIAIJ, 2493 MatDiagonalScale_MPIAIJ, 2494 MatNorm_MPIAIJ, 2495 /*20*/ MatAssemblyBegin_MPIAIJ, 2496 MatAssemblyEnd_MPIAIJ, 2497 MatSetOption_MPIAIJ, 2498 MatZeroEntries_MPIAIJ, 2499 /*24*/ MatZeroRows_MPIAIJ, 2500 0, 2501 0, 2502 0, 2503 0, 2504 /*29*/ MatSetUp_MPIAIJ, 2505 0, 2506 0, 2507 MatGetDiagonalBlock_MPIAIJ, 2508 0, 2509 /*34*/ MatDuplicate_MPIAIJ, 2510 0, 2511 0, 2512 0, 2513 0, 2514 /*39*/ MatAXPY_MPIAIJ, 2515 MatCreateSubMatrices_MPIAIJ, 2516 MatIncreaseOverlap_MPIAIJ, 2517 MatGetValues_MPIAIJ, 2518 MatCopy_MPIAIJ, 2519 /*44*/ MatGetRowMax_MPIAIJ, 2520 MatScale_MPIAIJ, 2521 MatShift_MPIAIJ, 2522 MatDiagonalSet_MPIAIJ, 2523 MatZeroRowsColumns_MPIAIJ, 2524 /*49*/ MatSetRandom_MPIAIJ, 2525 0, 2526 0, 2527 0, 2528 0, 2529 /*54*/ MatFDColoringCreate_MPIXAIJ, 2530 0, 2531 MatSetUnfactored_MPIAIJ, 2532 MatPermute_MPIAIJ, 2533 0, 2534 /*59*/ MatCreateSubMatrix_MPIAIJ, 2535 MatDestroy_MPIAIJ, 2536 MatView_MPIAIJ, 2537 0, 2538 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2539 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2540 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2541 0, 2542 0, 2543 0, 2544 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2545 MatGetRowMinAbs_MPIAIJ, 2546 0, 2547 0, 2548 0, 2549 0, 2550 /*75*/ MatFDColoringApply_AIJ, 2551 MatSetFromOptions_MPIAIJ, 2552 0, 2553 0, 2554 MatFindZeroDiagonals_MPIAIJ, 2555 /*80*/ 0, 2556 0, 2557 0, 2558 /*83*/ MatLoad_MPIAIJ, 2559 MatIsSymmetric_MPIAIJ, 2560 0, 2561 0, 2562 0, 2563 0, 2564 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2565 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2566 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2567 MatPtAP_MPIAIJ_MPIAIJ, 2568 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2569 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2570 0, 2571 0, 2572 0, 2573 0, 2574 /*99*/ 0, 2575 0, 2576 0, 2577 MatConjugate_MPIAIJ, 2578 0, 2579 /*104*/MatSetValuesRow_MPIAIJ, 2580 MatRealPart_MPIAIJ, 2581 MatImaginaryPart_MPIAIJ, 2582 0, 2583 0, 2584 /*109*/0, 2585 0, 2586 MatGetRowMin_MPIAIJ, 2587 0, 2588 MatMissingDiagonal_MPIAIJ, 2589 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2590 0, 2591 MatGetGhosts_MPIAIJ, 2592 0, 2593 0, 2594 /*119*/0, 2595 0, 2596 0, 2597 0, 2598 MatGetMultiProcBlock_MPIAIJ, 2599 /*124*/MatFindNonzeroRows_MPIAIJ, 2600 MatGetColumnNorms_MPIAIJ, 2601 MatInvertBlockDiagonal_MPIAIJ, 2602 0, 2603 MatCreateSubMatricesMPI_MPIAIJ, 2604 /*129*/0, 2605 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2606 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2607 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2608 0, 2609 /*134*/0, 2610 0, 2611 MatRARt_MPIAIJ_MPIAIJ, 2612 0, 2613 0, 2614 /*139*/MatSetBlockSizes_MPIAIJ, 2615 0, 2616 0, 2617 MatFDColoringSetUp_MPIXAIJ, 2618 MatFindOffBlockDiagonalEntries_MPIAIJ, 2619 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2620 }; 2621 2622 /* ----------------------------------------------------------------------------------------*/ 2623 2624 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2625 { 2626 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2627 PetscErrorCode ierr; 2628 2629 PetscFunctionBegin; 2630 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2631 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2632 PetscFunctionReturn(0); 2633 } 2634 2635 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2636 { 2637 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2638 PetscErrorCode ierr; 2639 2640 PetscFunctionBegin; 2641 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2642 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2647 { 2648 Mat_MPIAIJ *b; 2649 PetscErrorCode ierr; 2650 2651 PetscFunctionBegin; 2652 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2653 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2654 b = (Mat_MPIAIJ*)B->data; 2655 2656 #if defined(PETSC_USE_CTABLE) 2657 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2658 #else 2659 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2660 #endif 2661 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2662 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2663 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2664 2665 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2666 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2667 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2668 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2669 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2670 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2671 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2672 2673 if (!B->preallocated) { 2674 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2675 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2676 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2677 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2678 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2679 } 2680 2681 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2682 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2683 B->preallocated = PETSC_TRUE; 2684 B->was_assembled = PETSC_FALSE; 2685 B->assembled = PETSC_FALSE;; 2686 PetscFunctionReturn(0); 2687 } 2688 2689 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2690 { 2691 Mat_MPIAIJ *b; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2696 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2697 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2698 b = (Mat_MPIAIJ*)B->data; 2699 2700 #if defined(PETSC_USE_CTABLE) 2701 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2702 #else 2703 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2704 #endif 2705 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2706 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2707 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2708 2709 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2710 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2711 B->preallocated = PETSC_TRUE; 2712 B->was_assembled = PETSC_FALSE; 2713 B->assembled = PETSC_FALSE; 2714 PetscFunctionReturn(0); 2715 } 2716 2717 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2718 { 2719 Mat mat; 2720 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2721 PetscErrorCode ierr; 2722 2723 PetscFunctionBegin; 2724 *newmat = 0; 2725 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2726 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2727 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2728 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2729 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2730 a = (Mat_MPIAIJ*)mat->data; 2731 2732 mat->factortype = matin->factortype; 2733 mat->assembled = PETSC_TRUE; 2734 mat->insertmode = NOT_SET_VALUES; 2735 mat->preallocated = PETSC_TRUE; 2736 2737 a->size = oldmat->size; 2738 a->rank = oldmat->rank; 2739 a->donotstash = oldmat->donotstash; 2740 a->roworiented = oldmat->roworiented; 2741 a->rowindices = 0; 2742 a->rowvalues = 0; 2743 a->getrowactive = PETSC_FALSE; 2744 2745 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2746 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2747 2748 if (oldmat->colmap) { 2749 #if defined(PETSC_USE_CTABLE) 2750 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2751 #else 2752 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2753 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2754 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2755 #endif 2756 } else a->colmap = 0; 2757 if (oldmat->garray) { 2758 PetscInt len; 2759 len = oldmat->B->cmap->n; 2760 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2761 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2762 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2763 } else a->garray = 0; 2764 2765 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2766 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2767 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2768 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2769 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2770 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2771 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2772 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2773 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2774 *newmat = mat; 2775 PetscFunctionReturn(0); 2776 } 2777 2778 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2779 { 2780 PetscScalar *vals,*svals; 2781 MPI_Comm comm; 2782 PetscErrorCode ierr; 2783 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2784 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2785 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2786 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2787 PetscInt cend,cstart,n,*rowners; 2788 int fd; 2789 PetscInt bs = newMat->rmap->bs; 2790 2791 PetscFunctionBegin; 2792 /* force binary viewer to load .info file if it has not yet done so */ 2793 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2794 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2795 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2796 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2797 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2798 if (!rank) { 2799 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2800 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2801 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2802 } 2803 2804 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2805 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2806 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2807 if (bs < 0) bs = 1; 2808 2809 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2810 M = header[1]; N = header[2]; 2811 2812 /* If global sizes are set, check if they are consistent with that given in the file */ 2813 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2814 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2815 2816 /* determine ownership of all (block) rows */ 2817 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2818 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2819 else m = newMat->rmap->n; /* Set by user */ 2820 2821 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2822 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2823 2824 /* First process needs enough room for process with most rows */ 2825 if (!rank) { 2826 mmax = rowners[1]; 2827 for (i=2; i<=size; i++) { 2828 mmax = PetscMax(mmax, rowners[i]); 2829 } 2830 } else mmax = -1; /* unused, but compilers complain */ 2831 2832 rowners[0] = 0; 2833 for (i=2; i<=size; i++) { 2834 rowners[i] += rowners[i-1]; 2835 } 2836 rstart = rowners[rank]; 2837 rend = rowners[rank+1]; 2838 2839 /* distribute row lengths to all processors */ 2840 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2841 if (!rank) { 2842 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2843 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2844 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2845 for (j=0; j<m; j++) { 2846 procsnz[0] += ourlens[j]; 2847 } 2848 for (i=1; i<size; i++) { 2849 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2850 /* calculate the number of nonzeros on each processor */ 2851 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2852 procsnz[i] += rowlengths[j]; 2853 } 2854 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2855 } 2856 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2857 } else { 2858 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2859 } 2860 2861 if (!rank) { 2862 /* determine max buffer needed and allocate it */ 2863 maxnz = 0; 2864 for (i=0; i<size; i++) { 2865 maxnz = PetscMax(maxnz,procsnz[i]); 2866 } 2867 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2868 2869 /* read in my part of the matrix column indices */ 2870 nz = procsnz[0]; 2871 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2872 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2873 2874 /* read in every one elses and ship off */ 2875 for (i=1; i<size; i++) { 2876 nz = procsnz[i]; 2877 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2878 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2879 } 2880 ierr = PetscFree(cols);CHKERRQ(ierr); 2881 } else { 2882 /* determine buffer space needed for message */ 2883 nz = 0; 2884 for (i=0; i<m; i++) { 2885 nz += ourlens[i]; 2886 } 2887 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2888 2889 /* receive message of column indices*/ 2890 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2891 } 2892 2893 /* determine column ownership if matrix is not square */ 2894 if (N != M) { 2895 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2896 else n = newMat->cmap->n; 2897 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2898 cstart = cend - n; 2899 } else { 2900 cstart = rstart; 2901 cend = rend; 2902 n = cend - cstart; 2903 } 2904 2905 /* loop over local rows, determining number of off diagonal entries */ 2906 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2907 jj = 0; 2908 for (i=0; i<m; i++) { 2909 for (j=0; j<ourlens[i]; j++) { 2910 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2911 jj++; 2912 } 2913 } 2914 2915 for (i=0; i<m; i++) { 2916 ourlens[i] -= offlens[i]; 2917 } 2918 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2919 2920 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2921 2922 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2923 2924 for (i=0; i<m; i++) { 2925 ourlens[i] += offlens[i]; 2926 } 2927 2928 if (!rank) { 2929 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2930 2931 /* read in my part of the matrix numerical values */ 2932 nz = procsnz[0]; 2933 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2934 2935 /* insert into matrix */ 2936 jj = rstart; 2937 smycols = mycols; 2938 svals = vals; 2939 for (i=0; i<m; i++) { 2940 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2941 smycols += ourlens[i]; 2942 svals += ourlens[i]; 2943 jj++; 2944 } 2945 2946 /* read in other processors and ship out */ 2947 for (i=1; i<size; i++) { 2948 nz = procsnz[i]; 2949 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2950 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2951 } 2952 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2953 } else { 2954 /* receive numeric values */ 2955 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2956 2957 /* receive message of values*/ 2958 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2959 2960 /* insert into matrix */ 2961 jj = rstart; 2962 smycols = mycols; 2963 svals = vals; 2964 for (i=0; i<m; i++) { 2965 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2966 smycols += ourlens[i]; 2967 svals += ourlens[i]; 2968 jj++; 2969 } 2970 } 2971 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2972 ierr = PetscFree(vals);CHKERRQ(ierr); 2973 ierr = PetscFree(mycols);CHKERRQ(ierr); 2974 ierr = PetscFree(rowners);CHKERRQ(ierr); 2975 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2976 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2977 PetscFunctionReturn(0); 2978 } 2979 2980 /* Not scalable because of ISAllGather() unless getting all columns. */ 2981 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2982 { 2983 PetscErrorCode ierr; 2984 IS iscol_local; 2985 PetscBool isstride; 2986 PetscMPIInt lisstride=0,gisstride; 2987 2988 PetscFunctionBegin; 2989 /* check if we are grabbing all columns*/ 2990 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2991 2992 if (isstride) { 2993 PetscInt start,len,mstart,mlen; 2994 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2995 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2996 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2997 if (mstart == start && mlen-mstart == len) lisstride = 1; 2998 } 2999 3000 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3001 if (gisstride) { 3002 PetscInt N; 3003 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3004 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3005 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3006 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3007 } else { 3008 PetscInt cbs; 3009 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3010 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3011 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3012 } 3013 3014 *isseq = iscol_local; 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* 3019 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3020 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3021 3022 Input Parameters: 3023 mat - matrix 3024 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3025 i.e., mat->rstart <= isrow[i] < mat->rend 3026 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3027 i.e., mat->cstart <= iscol[i] < mat->cend 3028 Output Parameter: 3029 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3030 iscol_o - sequential column index set for retrieving mat->B 3031 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3032 */ 3033 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3034 { 3035 PetscErrorCode ierr; 3036 Vec x,cmap; 3037 const PetscInt *is_idx; 3038 PetscScalar *xarray,*cmaparray; 3039 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3040 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3041 Mat B=a->B; 3042 Vec lvec=a->lvec,lcmap; 3043 PetscInt i,cstart,cend,Bn=B->cmap->N; 3044 MPI_Comm comm; 3045 3046 PetscFunctionBegin; 3047 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3048 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3049 3050 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3051 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3052 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3053 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3054 3055 /* Get start indices */ 3056 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3057 isstart -= ncols; 3058 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3059 3060 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3061 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3062 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3063 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3064 for (i=0; i<ncols; i++) { 3065 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3066 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3067 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3068 } 3069 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3070 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3071 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3072 3073 /* Get iscol_d */ 3074 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3075 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3076 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3077 3078 /* Get isrow_d */ 3079 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3080 rstart = mat->rmap->rstart; 3081 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3082 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3083 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3084 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3085 3086 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3087 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3088 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3089 3090 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3091 ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3092 3093 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3094 3095 ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3096 ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 3099 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3100 /* off-process column indices */ 3101 count = 0; 3102 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3103 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3104 3105 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3106 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3107 for (i=0; i<Bn; i++) { 3108 if (PetscRealPart(xarray[i]) > -1.0) { 3109 idx[count] = i; /* local column index in off-diagonal part B */ 3110 cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3111 } 3112 } 3113 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3114 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3115 3116 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3117 /* cannot ensure iscol_o has same blocksize as iscol! */ 3118 3119 ierr = PetscFree(idx);CHKERRQ(ierr); 3120 3121 *garray = cmap1; 3122 3123 ierr = VecDestroy(&x);CHKERRQ(ierr); 3124 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3125 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3126 PetscFunctionReturn(0); 3127 } 3128 3129 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3130 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3131 { 3132 PetscErrorCode ierr; 3133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3134 Mat M = NULL; 3135 MPI_Comm comm; 3136 IS iscol_d,isrow_d,iscol_o; 3137 Mat Asub = NULL,Bsub = NULL; 3138 PetscInt n; 3139 3140 PetscFunctionBegin; 3141 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3142 3143 if (call == MAT_REUSE_MATRIX) { 3144 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3145 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3146 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3147 3148 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3149 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3150 3151 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3152 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3153 3154 /* Update diagonal and off-diagonal portions of submat */ 3155 asub = (Mat_MPIAIJ*)(*submat)->data; 3156 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3157 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3158 if (n) { 3159 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3160 } 3161 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3162 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 3164 } else { /* call == MAT_INITIAL_MATRIX) */ 3165 const PetscInt *garray; 3166 PetscInt BsubN; 3167 3168 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3169 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3170 3171 /* Create local submatrices Asub and Bsub */ 3172 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3173 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3174 3175 /* Create submatrix M */ 3176 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3177 3178 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3179 asub = (Mat_MPIAIJ*)M->data; 3180 3181 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3182 n = asub->B->cmap->N; 3183 if (BsubN > n) { 3184 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3185 const PetscInt *idx; 3186 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3187 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3188 3189 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3190 j = 0; 3191 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3192 for (i=0; i<n; i++) { 3193 if (j >= BsubN) break; 3194 while (subgarray[i] > garray[j]) j++; 3195 3196 if (subgarray[i] == garray[j]) { 3197 idx_new[i] = idx[j++]; 3198 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3199 } 3200 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3201 3202 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3203 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3204 3205 } else if (BsubN < n) { 3206 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3207 } 3208 3209 ierr = PetscFree(garray);CHKERRQ(ierr); 3210 *submat = M; 3211 3212 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3213 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3214 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3215 3216 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3217 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3218 3219 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3220 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3221 } 3222 PetscFunctionReturn(0); 3223 } 3224 3225 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3226 { 3227 PetscErrorCode ierr; 3228 IS iscol_local=NULL,isrow_d; 3229 PetscInt csize; 3230 PetscInt n,i,j,start,end; 3231 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3232 MPI_Comm comm; 3233 3234 PetscFunctionBegin; 3235 /* If isrow has same processor distribution as mat, 3236 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3237 if (call == MAT_REUSE_MATRIX) { 3238 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3239 if (isrow_d) { 3240 sameRowDist = PETSC_TRUE; 3241 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3242 } else { 3243 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3244 if (iscol_local) { 3245 sameRowDist = PETSC_TRUE; 3246 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3247 } 3248 } 3249 } else { 3250 /* Check if isrow has same processor distribution as mat */ 3251 sameDist[0] = PETSC_FALSE; 3252 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3253 if (!n) { 3254 sameDist[0] = PETSC_TRUE; 3255 } else { 3256 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3257 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3258 if (i >= start && j < end) { 3259 sameDist[0] = PETSC_TRUE; 3260 } 3261 } 3262 3263 /* Check if iscol has same processor distribution as mat */ 3264 sameDist[1] = PETSC_FALSE; 3265 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3266 if (!n) { 3267 sameDist[1] = PETSC_TRUE; 3268 } else { 3269 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3270 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3271 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3272 } 3273 3274 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3275 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3276 sameRowDist = tsameDist[0]; 3277 } 3278 3279 if (sameRowDist) { 3280 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3281 /* isrow and iscol have same processor distribution as mat */ 3282 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } else { /* sameRowDist */ 3285 /* isrow has same processor distribution as mat */ 3286 if (call == MAT_INITIAL_MATRIX) { 3287 PetscBool sorted; 3288 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3289 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3290 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3291 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3292 3293 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3294 if (sorted) { 3295 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3296 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3297 PetscFunctionReturn(0); 3298 } 3299 } else { /* call == MAT_REUSE_MATRIX */ 3300 IS iscol_sub; 3301 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3302 if (iscol_sub) { 3303 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 } 3307 } 3308 } 3309 3310 /* General case: iscol -> iscol_local which has global size of iscol */ 3311 if (call == MAT_REUSE_MATRIX) { 3312 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3313 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3314 } else { 3315 if (!iscol_local) { 3316 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3317 } 3318 } 3319 3320 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3321 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3322 3323 if (call == MAT_INITIAL_MATRIX) { 3324 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3325 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3326 } 3327 PetscFunctionReturn(0); 3328 } 3329 3330 /*@C 3331 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3332 and "off-diagonal" part of the matrix in CSR format. 3333 3334 Collective on MPI_Comm 3335 3336 Input Parameters: 3337 + comm - MPI communicator 3338 . A - "diagonal" portion of matrix 3339 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3340 - garray - global index of B columns 3341 3342 Output Parameter: 3343 . mat - the matrix, with input A as its local diagonal matrix 3344 Level: advanced 3345 3346 Notes: 3347 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3348 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3349 3350 .seealso: MatCreateMPIAIJWithSplitArrays() 3351 @*/ 3352 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3353 { 3354 PetscErrorCode ierr; 3355 Mat_MPIAIJ *maij; 3356 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3357 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3358 PetscScalar *oa=b->a; 3359 Mat Bnew; 3360 PetscInt m,n,N; 3361 3362 PetscFunctionBegin; 3363 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3364 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3365 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3366 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3367 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3368 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3369 3370 /* Get global columns of mat */ 3371 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3372 3373 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3374 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3375 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3376 maij = (Mat_MPIAIJ*)(*mat)->data; 3377 3378 (*mat)->preallocated = PETSC_TRUE; 3379 3380 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3381 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3382 3383 /* Set A as diagonal portion of *mat */ 3384 maij->A = A; 3385 3386 nz = oi[m]; 3387 for (i=0; i<nz; i++) { 3388 col = oj[i]; 3389 oj[i] = garray[col]; 3390 } 3391 3392 /* Set Bnew as off-diagonal portion of *mat */ 3393 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3394 bnew = (Mat_SeqAIJ*)Bnew->data; 3395 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3396 maij->B = Bnew; 3397 3398 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3399 3400 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3401 b->free_a = PETSC_FALSE; 3402 b->free_ij = PETSC_FALSE; 3403 ierr = MatDestroy(&B);CHKERRQ(ierr); 3404 3405 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3406 bnew->free_a = PETSC_TRUE; 3407 bnew->free_ij = PETSC_TRUE; 3408 3409 /* condense columns of maij->B */ 3410 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3411 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3412 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3413 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3414 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3415 PetscFunctionReturn(0); 3416 } 3417 3418 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3419 3420 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3421 { 3422 PetscErrorCode ierr; 3423 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3424 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3425 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3426 Mat M,Msub,B=a->B; 3427 MatScalar *aa; 3428 Mat_SeqAIJ *aij; 3429 PetscInt *garray = a->garray,*colsub,Ncols; 3430 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3431 IS iscol_sub,iscmap; 3432 const PetscInt *is_idx,*cmap; 3433 PetscBool allcolumns=PETSC_FALSE; 3434 MPI_Comm comm; 3435 3436 PetscFunctionBegin; 3437 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3438 3439 if (call == MAT_REUSE_MATRIX) { 3440 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3441 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3442 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3443 3444 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3445 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3446 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3448 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3449 3450 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3451 3452 } else { /* call == MAT_INITIAL_MATRIX) */ 3453 PetscBool flg; 3454 3455 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3456 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3457 3458 /* (1) iscol -> nonscalable iscol_local */ 3459 /* Check for special case: each processor gets entire matrix columns */ 3460 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3461 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3462 if (allcolumns) { 3463 iscol_sub = iscol_local; 3464 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3465 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3466 3467 } else { 3468 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3469 PetscInt *idx,*cmap1,k; 3470 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3471 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3472 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3473 count = 0; 3474 k = 0; 3475 for (i=0; i<Ncols; i++) { 3476 j = is_idx[i]; 3477 if (j >= cstart && j < cend) { 3478 /* diagonal part of mat */ 3479 idx[count] = j; 3480 cmap1[count++] = i; /* column index in submat */ 3481 } else if (Bn) { 3482 /* off-diagonal part of mat */ 3483 if (j == garray[k]) { 3484 idx[count] = j; 3485 cmap1[count++] = i; /* column index in submat */ 3486 } else if (j > garray[k]) { 3487 while (j > garray[k] && k < Bn-1) k++; 3488 if (j == garray[k]) { 3489 idx[count] = j; 3490 cmap1[count++] = i; /* column index in submat */ 3491 } 3492 } 3493 } 3494 } 3495 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3496 3497 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3498 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3499 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3500 3501 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3502 } 3503 3504 /* (3) Create sequential Msub */ 3505 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3506 } 3507 3508 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3509 aij = (Mat_SeqAIJ*)(Msub)->data; 3510 ii = aij->i; 3511 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3512 3513 /* 3514 m - number of local rows 3515 Ncols - number of columns (same on all processors) 3516 rstart - first row in new global matrix generated 3517 */ 3518 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3519 3520 if (call == MAT_INITIAL_MATRIX) { 3521 /* (4) Create parallel newmat */ 3522 PetscMPIInt rank,size; 3523 PetscInt csize; 3524 3525 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3526 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3527 3528 /* 3529 Determine the number of non-zeros in the diagonal and off-diagonal 3530 portions of the matrix in order to do correct preallocation 3531 */ 3532 3533 /* first get start and end of "diagonal" columns */ 3534 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3535 if (csize == PETSC_DECIDE) { 3536 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3537 if (mglobal == Ncols) { /* square matrix */ 3538 nlocal = m; 3539 } else { 3540 nlocal = Ncols/size + ((Ncols % size) > rank); 3541 } 3542 } else { 3543 nlocal = csize; 3544 } 3545 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3546 rstart = rend - nlocal; 3547 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3548 3549 /* next, compute all the lengths */ 3550 jj = aij->j; 3551 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3552 olens = dlens + m; 3553 for (i=0; i<m; i++) { 3554 jend = ii[i+1] - ii[i]; 3555 olen = 0; 3556 dlen = 0; 3557 for (j=0; j<jend; j++) { 3558 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3559 else dlen++; 3560 jj++; 3561 } 3562 olens[i] = olen; 3563 dlens[i] = dlen; 3564 } 3565 3566 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3567 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3568 3569 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3570 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3571 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3572 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3573 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3574 ierr = PetscFree(dlens);CHKERRQ(ierr); 3575 3576 } else { /* call == MAT_REUSE_MATRIX */ 3577 M = *newmat; 3578 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3579 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3580 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3581 /* 3582 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3583 rather than the slower MatSetValues(). 3584 */ 3585 M->was_assembled = PETSC_TRUE; 3586 M->assembled = PETSC_FALSE; 3587 } 3588 3589 /* (5) Set values of Msub to *newmat */ 3590 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3591 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3592 3593 jj = aij->j; 3594 aa = aij->a; 3595 for (i=0; i<m; i++) { 3596 row = rstart + i; 3597 nz = ii[i+1] - ii[i]; 3598 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3599 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3600 jj += nz; aa += nz; 3601 } 3602 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3603 3604 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3605 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 3607 ierr = PetscFree(colsub);CHKERRQ(ierr); 3608 3609 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3610 if (call == MAT_INITIAL_MATRIX) { 3611 *newmat = M; 3612 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3613 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3614 3615 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3616 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3617 3618 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3619 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3620 3621 if (iscol_local) { 3622 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3623 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3624 } 3625 } 3626 PetscFunctionReturn(0); 3627 } 3628 3629 /* 3630 Not great since it makes two copies of the submatrix, first an SeqAIJ 3631 in local and then by concatenating the local matrices the end result. 3632 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3633 3634 Note: This requires a sequential iscol with all indices. 3635 */ 3636 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3637 { 3638 PetscErrorCode ierr; 3639 PetscMPIInt rank,size; 3640 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3641 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3642 Mat M,Mreuse; 3643 MatScalar *aa,*vwork; 3644 MPI_Comm comm; 3645 Mat_SeqAIJ *aij; 3646 PetscBool colflag,allcolumns=PETSC_FALSE; 3647 3648 PetscFunctionBegin; 3649 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3650 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3652 3653 /* Check for special case: each processor gets entire matrix columns */ 3654 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3655 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3656 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3657 3658 if (call == MAT_REUSE_MATRIX) { 3659 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3660 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3661 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3662 } else { 3663 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3664 } 3665 3666 /* 3667 m - number of local rows 3668 n - number of columns (same on all processors) 3669 rstart - first row in new global matrix generated 3670 */ 3671 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3672 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3673 if (call == MAT_INITIAL_MATRIX) { 3674 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3675 ii = aij->i; 3676 jj = aij->j; 3677 3678 /* 3679 Determine the number of non-zeros in the diagonal and off-diagonal 3680 portions of the matrix in order to do correct preallocation 3681 */ 3682 3683 /* first get start and end of "diagonal" columns */ 3684 if (csize == PETSC_DECIDE) { 3685 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3686 if (mglobal == n) { /* square matrix */ 3687 nlocal = m; 3688 } else { 3689 nlocal = n/size + ((n % size) > rank); 3690 } 3691 } else { 3692 nlocal = csize; 3693 } 3694 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3695 rstart = rend - nlocal; 3696 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3697 3698 /* next, compute all the lengths */ 3699 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3700 olens = dlens + m; 3701 for (i=0; i<m; i++) { 3702 jend = ii[i+1] - ii[i]; 3703 olen = 0; 3704 dlen = 0; 3705 for (j=0; j<jend; j++) { 3706 if (*jj < rstart || *jj >= rend) olen++; 3707 else dlen++; 3708 jj++; 3709 } 3710 olens[i] = olen; 3711 dlens[i] = dlen; 3712 } 3713 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3714 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3715 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3716 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3717 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3718 ierr = PetscFree(dlens);CHKERRQ(ierr); 3719 } else { 3720 PetscInt ml,nl; 3721 3722 M = *newmat; 3723 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3724 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3725 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3726 /* 3727 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3728 rather than the slower MatSetValues(). 3729 */ 3730 M->was_assembled = PETSC_TRUE; 3731 M->assembled = PETSC_FALSE; 3732 } 3733 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3734 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3735 ii = aij->i; 3736 jj = aij->j; 3737 aa = aij->a; 3738 for (i=0; i<m; i++) { 3739 row = rstart + i; 3740 nz = ii[i+1] - ii[i]; 3741 cwork = jj; jj += nz; 3742 vwork = aa; aa += nz; 3743 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3744 } 3745 3746 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3747 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3748 *newmat = M; 3749 3750 /* save submatrix used in processor for next request */ 3751 if (call == MAT_INITIAL_MATRIX) { 3752 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3753 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3754 } 3755 PetscFunctionReturn(0); 3756 } 3757 3758 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3759 { 3760 PetscInt m,cstart, cend,j,nnz,i,d; 3761 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3762 const PetscInt *JJ; 3763 PetscScalar *values; 3764 PetscErrorCode ierr; 3765 PetscBool nooffprocentries; 3766 3767 PetscFunctionBegin; 3768 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3769 3770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3772 m = B->rmap->n; 3773 cstart = B->cmap->rstart; 3774 cend = B->cmap->rend; 3775 rstart = B->rmap->rstart; 3776 3777 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3778 3779 #if defined(PETSC_USE_DEBUGGING) 3780 for (i=0; i<m; i++) { 3781 nnz = Ii[i+1]- Ii[i]; 3782 JJ = J + Ii[i]; 3783 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3784 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3785 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3786 } 3787 #endif 3788 3789 for (i=0; i<m; i++) { 3790 nnz = Ii[i+1]- Ii[i]; 3791 JJ = J + Ii[i]; 3792 nnz_max = PetscMax(nnz_max,nnz); 3793 d = 0; 3794 for (j=0; j<nnz; j++) { 3795 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3796 } 3797 d_nnz[i] = d; 3798 o_nnz[i] = nnz - d; 3799 } 3800 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3801 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3802 3803 if (v) values = (PetscScalar*)v; 3804 else { 3805 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3806 } 3807 3808 for (i=0; i<m; i++) { 3809 ii = i + rstart; 3810 nnz = Ii[i+1]- Ii[i]; 3811 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3812 } 3813 nooffprocentries = B->nooffprocentries; 3814 B->nooffprocentries = PETSC_TRUE; 3815 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3817 B->nooffprocentries = nooffprocentries; 3818 3819 if (!v) { 3820 ierr = PetscFree(values);CHKERRQ(ierr); 3821 } 3822 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3823 PetscFunctionReturn(0); 3824 } 3825 3826 /*@ 3827 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3828 (the default parallel PETSc format). 3829 3830 Collective on MPI_Comm 3831 3832 Input Parameters: 3833 + B - the matrix 3834 . i - the indices into j for the start of each local row (starts with zero) 3835 . j - the column indices for each local row (starts with zero) 3836 - v - optional values in the matrix 3837 3838 Level: developer 3839 3840 Notes: 3841 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3842 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3843 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3844 3845 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3846 3847 The format which is used for the sparse matrix input, is equivalent to a 3848 row-major ordering.. i.e for the following matrix, the input data expected is 3849 as shown 3850 3851 $ 1 0 0 3852 $ 2 0 3 P0 3853 $ ------- 3854 $ 4 5 6 P1 3855 $ 3856 $ Process0 [P0]: rows_owned=[0,1] 3857 $ i = {0,1,3} [size = nrow+1 = 2+1] 3858 $ j = {0,0,2} [size = 3] 3859 $ v = {1,2,3} [size = 3] 3860 $ 3861 $ Process1 [P1]: rows_owned=[2] 3862 $ i = {0,3} [size = nrow+1 = 1+1] 3863 $ j = {0,1,2} [size = 3] 3864 $ v = {4,5,6} [size = 3] 3865 3866 .keywords: matrix, aij, compressed row, sparse, parallel 3867 3868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3869 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3870 @*/ 3871 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3872 { 3873 PetscErrorCode ierr; 3874 3875 PetscFunctionBegin; 3876 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3877 PetscFunctionReturn(0); 3878 } 3879 3880 /*@C 3881 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3882 (the default parallel PETSc format). For good matrix assembly performance 3883 the user should preallocate the matrix storage by setting the parameters 3884 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3885 performance can be increased by more than a factor of 50. 3886 3887 Collective on MPI_Comm 3888 3889 Input Parameters: 3890 + B - the matrix 3891 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3892 (same value is used for all local rows) 3893 . d_nnz - array containing the number of nonzeros in the various rows of the 3894 DIAGONAL portion of the local submatrix (possibly different for each row) 3895 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3896 The size of this array is equal to the number of local rows, i.e 'm'. 3897 For matrices that will be factored, you must leave room for (and set) 3898 the diagonal entry even if it is zero. 3899 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3900 submatrix (same value is used for all local rows). 3901 - o_nnz - array containing the number of nonzeros in the various rows of the 3902 OFF-DIAGONAL portion of the local submatrix (possibly different for 3903 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3904 structure. The size of this array is equal to the number 3905 of local rows, i.e 'm'. 3906 3907 If the *_nnz parameter is given then the *_nz parameter is ignored 3908 3909 The AIJ format (also called the Yale sparse matrix format or 3910 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3911 storage. The stored row and column indices begin with zero. 3912 See Users-Manual: ch_mat for details. 3913 3914 The parallel matrix is partitioned such that the first m0 rows belong to 3915 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3916 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3917 3918 The DIAGONAL portion of the local submatrix of a processor can be defined 3919 as the submatrix which is obtained by extraction the part corresponding to 3920 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3921 first row that belongs to the processor, r2 is the last row belonging to 3922 the this processor, and c1-c2 is range of indices of the local part of a 3923 vector suitable for applying the matrix to. This is an mxn matrix. In the 3924 common case of a square matrix, the row and column ranges are the same and 3925 the DIAGONAL part is also square. The remaining portion of the local 3926 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3927 3928 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3929 3930 You can call MatGetInfo() to get information on how effective the preallocation was; 3931 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3932 You can also run with the option -info and look for messages with the string 3933 malloc in them to see if additional memory allocation was needed. 3934 3935 Example usage: 3936 3937 Consider the following 8x8 matrix with 34 non-zero values, that is 3938 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3939 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3940 as follows: 3941 3942 .vb 3943 1 2 0 | 0 3 0 | 0 4 3944 Proc0 0 5 6 | 7 0 0 | 8 0 3945 9 0 10 | 11 0 0 | 12 0 3946 ------------------------------------- 3947 13 0 14 | 15 16 17 | 0 0 3948 Proc1 0 18 0 | 19 20 21 | 0 0 3949 0 0 0 | 22 23 0 | 24 0 3950 ------------------------------------- 3951 Proc2 25 26 27 | 0 0 28 | 29 0 3952 30 0 0 | 31 32 33 | 0 34 3953 .ve 3954 3955 This can be represented as a collection of submatrices as: 3956 3957 .vb 3958 A B C 3959 D E F 3960 G H I 3961 .ve 3962 3963 Where the submatrices A,B,C are owned by proc0, D,E,F are 3964 owned by proc1, G,H,I are owned by proc2. 3965 3966 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3967 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3968 The 'M','N' parameters are 8,8, and have the same values on all procs. 3969 3970 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3971 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3972 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3973 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3974 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3975 matrix, ans [DF] as another SeqAIJ matrix. 3976 3977 When d_nz, o_nz parameters are specified, d_nz storage elements are 3978 allocated for every row of the local diagonal submatrix, and o_nz 3979 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3980 One way to choose d_nz and o_nz is to use the max nonzerors per local 3981 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3982 In this case, the values of d_nz,o_nz are: 3983 .vb 3984 proc0 : dnz = 2, o_nz = 2 3985 proc1 : dnz = 3, o_nz = 2 3986 proc2 : dnz = 1, o_nz = 4 3987 .ve 3988 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3989 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3990 for proc3. i.e we are using 12+15+10=37 storage locations to store 3991 34 values. 3992 3993 When d_nnz, o_nnz parameters are specified, the storage is specified 3994 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3995 In the above case the values for d_nnz,o_nnz are: 3996 .vb 3997 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3998 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3999 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4000 .ve 4001 Here the space allocated is sum of all the above values i.e 34, and 4002 hence pre-allocation is perfect. 4003 4004 Level: intermediate 4005 4006 .keywords: matrix, aij, compressed row, sparse, parallel 4007 4008 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4009 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4010 @*/ 4011 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4012 { 4013 PetscErrorCode ierr; 4014 4015 PetscFunctionBegin; 4016 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4017 PetscValidType(B,1); 4018 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4019 PetscFunctionReturn(0); 4020 } 4021 4022 /*@ 4023 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4024 CSR format the local rows. 4025 4026 Collective on MPI_Comm 4027 4028 Input Parameters: 4029 + comm - MPI communicator 4030 . m - number of local rows (Cannot be PETSC_DECIDE) 4031 . n - This value should be the same as the local size used in creating the 4032 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4033 calculated if N is given) For square matrices n is almost always m. 4034 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4035 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4036 . i - row indices 4037 . j - column indices 4038 - a - matrix values 4039 4040 Output Parameter: 4041 . mat - the matrix 4042 4043 Level: intermediate 4044 4045 Notes: 4046 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4047 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4048 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4049 4050 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4051 4052 The format which is used for the sparse matrix input, is equivalent to a 4053 row-major ordering.. i.e for the following matrix, the input data expected is 4054 as shown 4055 4056 $ 1 0 0 4057 $ 2 0 3 P0 4058 $ ------- 4059 $ 4 5 6 P1 4060 $ 4061 $ Process0 [P0]: rows_owned=[0,1] 4062 $ i = {0,1,3} [size = nrow+1 = 2+1] 4063 $ j = {0,0,2} [size = 3] 4064 $ v = {1,2,3} [size = 3] 4065 $ 4066 $ Process1 [P1]: rows_owned=[2] 4067 $ i = {0,3} [size = nrow+1 = 1+1] 4068 $ j = {0,1,2} [size = 3] 4069 $ v = {4,5,6} [size = 3] 4070 4071 .keywords: matrix, aij, compressed row, sparse, parallel 4072 4073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4074 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4075 @*/ 4076 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4077 { 4078 PetscErrorCode ierr; 4079 4080 PetscFunctionBegin; 4081 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4082 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4083 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4084 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4085 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4086 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4087 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4088 PetscFunctionReturn(0); 4089 } 4090 4091 /*@C 4092 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4093 (the default parallel PETSc format). For good matrix assembly performance 4094 the user should preallocate the matrix storage by setting the parameters 4095 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4096 performance can be increased by more than a factor of 50. 4097 4098 Collective on MPI_Comm 4099 4100 Input Parameters: 4101 + comm - MPI communicator 4102 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4103 This value should be the same as the local size used in creating the 4104 y vector for the matrix-vector product y = Ax. 4105 . n - This value should be the same as the local size used in creating the 4106 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4107 calculated if N is given) For square matrices n is almost always m. 4108 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4109 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4110 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4111 (same value is used for all local rows) 4112 . d_nnz - array containing the number of nonzeros in the various rows of the 4113 DIAGONAL portion of the local submatrix (possibly different for each row) 4114 or NULL, if d_nz is used to specify the nonzero structure. 4115 The size of this array is equal to the number of local rows, i.e 'm'. 4116 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4117 submatrix (same value is used for all local rows). 4118 - o_nnz - array containing the number of nonzeros in the various rows of the 4119 OFF-DIAGONAL portion of the local submatrix (possibly different for 4120 each row) or NULL, if o_nz is used to specify the nonzero 4121 structure. The size of this array is equal to the number 4122 of local rows, i.e 'm'. 4123 4124 Output Parameter: 4125 . A - the matrix 4126 4127 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4128 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4129 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4130 4131 Notes: 4132 If the *_nnz parameter is given then the *_nz parameter is ignored 4133 4134 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4135 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4136 storage requirements for this matrix. 4137 4138 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4139 processor than it must be used on all processors that share the object for 4140 that argument. 4141 4142 The user MUST specify either the local or global matrix dimensions 4143 (possibly both). 4144 4145 The parallel matrix is partitioned across processors such that the 4146 first m0 rows belong to process 0, the next m1 rows belong to 4147 process 1, the next m2 rows belong to process 2 etc.. where 4148 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4149 values corresponding to [m x N] submatrix. 4150 4151 The columns are logically partitioned with the n0 columns belonging 4152 to 0th partition, the next n1 columns belonging to the next 4153 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4154 4155 The DIAGONAL portion of the local submatrix on any given processor 4156 is the submatrix corresponding to the rows and columns m,n 4157 corresponding to the given processor. i.e diagonal matrix on 4158 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4159 etc. The remaining portion of the local submatrix [m x (N-n)] 4160 constitute the OFF-DIAGONAL portion. The example below better 4161 illustrates this concept. 4162 4163 For a square global matrix we define each processor's diagonal portion 4164 to be its local rows and the corresponding columns (a square submatrix); 4165 each processor's off-diagonal portion encompasses the remainder of the 4166 local matrix (a rectangular submatrix). 4167 4168 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4169 4170 When calling this routine with a single process communicator, a matrix of 4171 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4172 type of communicator, use the construction mechanism 4173 .vb 4174 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4175 .ve 4176 4177 $ MatCreate(...,&A); 4178 $ MatSetType(A,MATMPIAIJ); 4179 $ MatSetSizes(A, m,n,M,N); 4180 $ MatMPIAIJSetPreallocation(A,...); 4181 4182 By default, this format uses inodes (identical nodes) when possible. 4183 We search for consecutive rows with the same nonzero structure, thereby 4184 reusing matrix information to achieve increased efficiency. 4185 4186 Options Database Keys: 4187 + -mat_no_inode - Do not use inodes 4188 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4189 - -mat_aij_oneindex - Internally use indexing starting at 1 4190 rather than 0. Note that when calling MatSetValues(), 4191 the user still MUST index entries starting at 0! 4192 4193 4194 Example usage: 4195 4196 Consider the following 8x8 matrix with 34 non-zero values, that is 4197 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4198 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4199 as follows 4200 4201 .vb 4202 1 2 0 | 0 3 0 | 0 4 4203 Proc0 0 5 6 | 7 0 0 | 8 0 4204 9 0 10 | 11 0 0 | 12 0 4205 ------------------------------------- 4206 13 0 14 | 15 16 17 | 0 0 4207 Proc1 0 18 0 | 19 20 21 | 0 0 4208 0 0 0 | 22 23 0 | 24 0 4209 ------------------------------------- 4210 Proc2 25 26 27 | 0 0 28 | 29 0 4211 30 0 0 | 31 32 33 | 0 34 4212 .ve 4213 4214 This can be represented as a collection of submatrices as 4215 4216 .vb 4217 A B C 4218 D E F 4219 G H I 4220 .ve 4221 4222 Where the submatrices A,B,C are owned by proc0, D,E,F are 4223 owned by proc1, G,H,I are owned by proc2. 4224 4225 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4226 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4227 The 'M','N' parameters are 8,8, and have the same values on all procs. 4228 4229 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4230 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4231 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4232 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4233 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4234 matrix, ans [DF] as another SeqAIJ matrix. 4235 4236 When d_nz, o_nz parameters are specified, d_nz storage elements are 4237 allocated for every row of the local diagonal submatrix, and o_nz 4238 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4239 One way to choose d_nz and o_nz is to use the max nonzerors per local 4240 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4241 In this case, the values of d_nz,o_nz are 4242 .vb 4243 proc0 : dnz = 2, o_nz = 2 4244 proc1 : dnz = 3, o_nz = 2 4245 proc2 : dnz = 1, o_nz = 4 4246 .ve 4247 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4248 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4249 for proc3. i.e we are using 12+15+10=37 storage locations to store 4250 34 values. 4251 4252 When d_nnz, o_nnz parameters are specified, the storage is specified 4253 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4254 In the above case the values for d_nnz,o_nnz are 4255 .vb 4256 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4257 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4258 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4259 .ve 4260 Here the space allocated is sum of all the above values i.e 34, and 4261 hence pre-allocation is perfect. 4262 4263 Level: intermediate 4264 4265 .keywords: matrix, aij, compressed row, sparse, parallel 4266 4267 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4268 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4269 @*/ 4270 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4271 { 4272 PetscErrorCode ierr; 4273 PetscMPIInt size; 4274 4275 PetscFunctionBegin; 4276 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4277 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4278 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4279 if (size > 1) { 4280 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4281 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4282 } else { 4283 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4284 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4285 } 4286 PetscFunctionReturn(0); 4287 } 4288 4289 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4290 { 4291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4292 PetscBool flg; 4293 PetscErrorCode ierr; 4294 4295 PetscFunctionBegin; 4296 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4297 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4298 if (Ad) *Ad = a->A; 4299 if (Ao) *Ao = a->B; 4300 if (colmap) *colmap = a->garray; 4301 PetscFunctionReturn(0); 4302 } 4303 4304 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4305 { 4306 PetscErrorCode ierr; 4307 PetscInt m,N,i,rstart,nnz,Ii; 4308 PetscInt *indx; 4309 PetscScalar *values; 4310 4311 PetscFunctionBegin; 4312 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4313 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4314 PetscInt *dnz,*onz,sum,bs,cbs; 4315 4316 if (n == PETSC_DECIDE) { 4317 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4318 } 4319 /* Check sum(n) = N */ 4320 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4321 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4322 4323 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4324 rstart -= m; 4325 4326 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4327 for (i=0; i<m; i++) { 4328 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4329 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4330 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4331 } 4332 4333 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4334 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4335 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4336 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4337 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4338 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4339 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4340 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4341 } 4342 4343 /* numeric phase */ 4344 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4345 for (i=0; i<m; i++) { 4346 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4347 Ii = i + rstart; 4348 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4349 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4350 } 4351 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4352 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4353 PetscFunctionReturn(0); 4354 } 4355 4356 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4357 { 4358 PetscErrorCode ierr; 4359 PetscMPIInt rank; 4360 PetscInt m,N,i,rstart,nnz; 4361 size_t len; 4362 const PetscInt *indx; 4363 PetscViewer out; 4364 char *name; 4365 Mat B; 4366 const PetscScalar *values; 4367 4368 PetscFunctionBegin; 4369 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4370 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4371 /* Should this be the type of the diagonal block of A? */ 4372 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4373 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4374 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4375 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4376 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4377 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4378 for (i=0; i<m; i++) { 4379 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4380 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4381 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4382 } 4383 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4385 4386 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4387 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4388 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4389 sprintf(name,"%s.%d",outfile,rank); 4390 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4391 ierr = PetscFree(name);CHKERRQ(ierr); 4392 ierr = MatView(B,out);CHKERRQ(ierr); 4393 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4394 ierr = MatDestroy(&B);CHKERRQ(ierr); 4395 PetscFunctionReturn(0); 4396 } 4397 4398 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4399 { 4400 PetscErrorCode ierr; 4401 Mat_Merge_SeqsToMPI *merge; 4402 PetscContainer container; 4403 4404 PetscFunctionBegin; 4405 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4406 if (container) { 4407 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4408 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4409 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4410 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4411 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4412 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4413 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4414 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4415 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4416 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4417 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4418 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4419 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4420 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4421 ierr = PetscFree(merge);CHKERRQ(ierr); 4422 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4423 } 4424 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4425 PetscFunctionReturn(0); 4426 } 4427 4428 #include <../src/mat/utils/freespace.h> 4429 #include <petscbt.h> 4430 4431 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4432 { 4433 PetscErrorCode ierr; 4434 MPI_Comm comm; 4435 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4436 PetscMPIInt size,rank,taga,*len_s; 4437 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4438 PetscInt proc,m; 4439 PetscInt **buf_ri,**buf_rj; 4440 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4441 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4442 MPI_Request *s_waits,*r_waits; 4443 MPI_Status *status; 4444 MatScalar *aa=a->a; 4445 MatScalar **abuf_r,*ba_i; 4446 Mat_Merge_SeqsToMPI *merge; 4447 PetscContainer container; 4448 4449 PetscFunctionBegin; 4450 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4451 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4452 4453 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4454 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4455 4456 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4457 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4458 4459 bi = merge->bi; 4460 bj = merge->bj; 4461 buf_ri = merge->buf_ri; 4462 buf_rj = merge->buf_rj; 4463 4464 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4465 owners = merge->rowmap->range; 4466 len_s = merge->len_s; 4467 4468 /* send and recv matrix values */ 4469 /*-----------------------------*/ 4470 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4471 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4472 4473 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4474 for (proc=0,k=0; proc<size; proc++) { 4475 if (!len_s[proc]) continue; 4476 i = owners[proc]; 4477 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4478 k++; 4479 } 4480 4481 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4482 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4483 ierr = PetscFree(status);CHKERRQ(ierr); 4484 4485 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4486 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4487 4488 /* insert mat values of mpimat */ 4489 /*----------------------------*/ 4490 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4491 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4492 4493 for (k=0; k<merge->nrecv; k++) { 4494 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4495 nrows = *(buf_ri_k[k]); 4496 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4497 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4498 } 4499 4500 /* set values of ba */ 4501 m = merge->rowmap->n; 4502 for (i=0; i<m; i++) { 4503 arow = owners[rank] + i; 4504 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4505 bnzi = bi[i+1] - bi[i]; 4506 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4507 4508 /* add local non-zero vals of this proc's seqmat into ba */ 4509 anzi = ai[arow+1] - ai[arow]; 4510 aj = a->j + ai[arow]; 4511 aa = a->a + ai[arow]; 4512 nextaj = 0; 4513 for (j=0; nextaj<anzi; j++) { 4514 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4515 ba_i[j] += aa[nextaj++]; 4516 } 4517 } 4518 4519 /* add received vals into ba */ 4520 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4521 /* i-th row */ 4522 if (i == *nextrow[k]) { 4523 anzi = *(nextai[k]+1) - *nextai[k]; 4524 aj = buf_rj[k] + *(nextai[k]); 4525 aa = abuf_r[k] + *(nextai[k]); 4526 nextaj = 0; 4527 for (j=0; nextaj<anzi; j++) { 4528 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4529 ba_i[j] += aa[nextaj++]; 4530 } 4531 } 4532 nextrow[k]++; nextai[k]++; 4533 } 4534 } 4535 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4536 } 4537 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4538 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4539 4540 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4541 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4542 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4543 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4544 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4545 PetscFunctionReturn(0); 4546 } 4547 4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4549 { 4550 PetscErrorCode ierr; 4551 Mat B_mpi; 4552 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4553 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4554 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4555 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4556 PetscInt len,proc,*dnz,*onz,bs,cbs; 4557 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4558 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4559 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4560 MPI_Status *status; 4561 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4562 PetscBT lnkbt; 4563 Mat_Merge_SeqsToMPI *merge; 4564 PetscContainer container; 4565 4566 PetscFunctionBegin; 4567 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4568 4569 /* make sure it is a PETSc comm */ 4570 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4571 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4573 4574 ierr = PetscNew(&merge);CHKERRQ(ierr); 4575 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4576 4577 /* determine row ownership */ 4578 /*---------------------------------------------------------*/ 4579 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4580 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4581 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4582 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4583 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4584 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4585 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4586 4587 m = merge->rowmap->n; 4588 owners = merge->rowmap->range; 4589 4590 /* determine the number of messages to send, their lengths */ 4591 /*---------------------------------------------------------*/ 4592 len_s = merge->len_s; 4593 4594 len = 0; /* length of buf_si[] */ 4595 merge->nsend = 0; 4596 for (proc=0; proc<size; proc++) { 4597 len_si[proc] = 0; 4598 if (proc == rank) { 4599 len_s[proc] = 0; 4600 } else { 4601 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4602 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4603 } 4604 if (len_s[proc]) { 4605 merge->nsend++; 4606 nrows = 0; 4607 for (i=owners[proc]; i<owners[proc+1]; i++) { 4608 if (ai[i+1] > ai[i]) nrows++; 4609 } 4610 len_si[proc] = 2*(nrows+1); 4611 len += len_si[proc]; 4612 } 4613 } 4614 4615 /* determine the number and length of messages to receive for ij-structure */ 4616 /*-------------------------------------------------------------------------*/ 4617 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4618 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4619 4620 /* post the Irecv of j-structure */ 4621 /*-------------------------------*/ 4622 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4623 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4624 4625 /* post the Isend of j-structure */ 4626 /*--------------------------------*/ 4627 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4628 4629 for (proc=0, k=0; proc<size; proc++) { 4630 if (!len_s[proc]) continue; 4631 i = owners[proc]; 4632 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4633 k++; 4634 } 4635 4636 /* receives and sends of j-structure are complete */ 4637 /*------------------------------------------------*/ 4638 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4639 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4640 4641 /* send and recv i-structure */ 4642 /*---------------------------*/ 4643 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4644 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4645 4646 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4647 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4648 for (proc=0,k=0; proc<size; proc++) { 4649 if (!len_s[proc]) continue; 4650 /* form outgoing message for i-structure: 4651 buf_si[0]: nrows to be sent 4652 [1:nrows]: row index (global) 4653 [nrows+1:2*nrows+1]: i-structure index 4654 */ 4655 /*-------------------------------------------*/ 4656 nrows = len_si[proc]/2 - 1; 4657 buf_si_i = buf_si + nrows+1; 4658 buf_si[0] = nrows; 4659 buf_si_i[0] = 0; 4660 nrows = 0; 4661 for (i=owners[proc]; i<owners[proc+1]; i++) { 4662 anzi = ai[i+1] - ai[i]; 4663 if (anzi) { 4664 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4665 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4666 nrows++; 4667 } 4668 } 4669 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4670 k++; 4671 buf_si += len_si[proc]; 4672 } 4673 4674 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4675 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4676 4677 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4678 for (i=0; i<merge->nrecv; i++) { 4679 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4680 } 4681 4682 ierr = PetscFree(len_si);CHKERRQ(ierr); 4683 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4684 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4685 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4686 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4687 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4688 ierr = PetscFree(status);CHKERRQ(ierr); 4689 4690 /* compute a local seq matrix in each processor */ 4691 /*----------------------------------------------*/ 4692 /* allocate bi array and free space for accumulating nonzero column info */ 4693 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4694 bi[0] = 0; 4695 4696 /* create and initialize a linked list */ 4697 nlnk = N+1; 4698 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4699 4700 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4701 len = ai[owners[rank+1]] - ai[owners[rank]]; 4702 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4703 4704 current_space = free_space; 4705 4706 /* determine symbolic info for each local row */ 4707 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4708 4709 for (k=0; k<merge->nrecv; k++) { 4710 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4711 nrows = *buf_ri_k[k]; 4712 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4713 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4714 } 4715 4716 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4717 len = 0; 4718 for (i=0; i<m; i++) { 4719 bnzi = 0; 4720 /* add local non-zero cols of this proc's seqmat into lnk */ 4721 arow = owners[rank] + i; 4722 anzi = ai[arow+1] - ai[arow]; 4723 aj = a->j + ai[arow]; 4724 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4725 bnzi += nlnk; 4726 /* add received col data into lnk */ 4727 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4728 if (i == *nextrow[k]) { /* i-th row */ 4729 anzi = *(nextai[k]+1) - *nextai[k]; 4730 aj = buf_rj[k] + *nextai[k]; 4731 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4732 bnzi += nlnk; 4733 nextrow[k]++; nextai[k]++; 4734 } 4735 } 4736 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4737 4738 /* if free space is not available, make more free space */ 4739 if (current_space->local_remaining<bnzi) { 4740 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4741 nspacedouble++; 4742 } 4743 /* copy data into free space, then initialize lnk */ 4744 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4745 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4746 4747 current_space->array += bnzi; 4748 current_space->local_used += bnzi; 4749 current_space->local_remaining -= bnzi; 4750 4751 bi[i+1] = bi[i] + bnzi; 4752 } 4753 4754 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4755 4756 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4757 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4758 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4759 4760 /* create symbolic parallel matrix B_mpi */ 4761 /*---------------------------------------*/ 4762 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4763 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4764 if (n==PETSC_DECIDE) { 4765 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4766 } else { 4767 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4768 } 4769 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4770 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4771 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4772 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4773 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4774 4775 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4776 B_mpi->assembled = PETSC_FALSE; 4777 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4778 merge->bi = bi; 4779 merge->bj = bj; 4780 merge->buf_ri = buf_ri; 4781 merge->buf_rj = buf_rj; 4782 merge->coi = NULL; 4783 merge->coj = NULL; 4784 merge->owners_co = NULL; 4785 4786 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4787 4788 /* attach the supporting struct to B_mpi for reuse */ 4789 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4790 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4791 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4792 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4793 *mpimat = B_mpi; 4794 4795 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 4799 /*@C 4800 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4801 matrices from each processor 4802 4803 Collective on MPI_Comm 4804 4805 Input Parameters: 4806 + comm - the communicators the parallel matrix will live on 4807 . seqmat - the input sequential matrices 4808 . m - number of local rows (or PETSC_DECIDE) 4809 . n - number of local columns (or PETSC_DECIDE) 4810 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4811 4812 Output Parameter: 4813 . mpimat - the parallel matrix generated 4814 4815 Level: advanced 4816 4817 Notes: 4818 The dimensions of the sequential matrix in each processor MUST be the same. 4819 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4820 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4821 @*/ 4822 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4823 { 4824 PetscErrorCode ierr; 4825 PetscMPIInt size; 4826 4827 PetscFunctionBegin; 4828 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4829 if (size == 1) { 4830 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4831 if (scall == MAT_INITIAL_MATRIX) { 4832 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4833 } else { 4834 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4835 } 4836 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4837 PetscFunctionReturn(0); 4838 } 4839 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4840 if (scall == MAT_INITIAL_MATRIX) { 4841 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4842 } 4843 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4844 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4845 PetscFunctionReturn(0); 4846 } 4847 4848 /*@ 4849 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4850 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4851 with MatGetSize() 4852 4853 Not Collective 4854 4855 Input Parameters: 4856 + A - the matrix 4857 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4858 4859 Output Parameter: 4860 . A_loc - the local sequential matrix generated 4861 4862 Level: developer 4863 4864 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4865 4866 @*/ 4867 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4868 { 4869 PetscErrorCode ierr; 4870 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4871 Mat_SeqAIJ *mat,*a,*b; 4872 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4873 MatScalar *aa,*ba,*cam; 4874 PetscScalar *ca; 4875 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4876 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4877 PetscBool match; 4878 MPI_Comm comm; 4879 PetscMPIInt size; 4880 4881 PetscFunctionBegin; 4882 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4883 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4884 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4885 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4886 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4887 4888 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4889 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4890 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4891 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4892 aa = a->a; ba = b->a; 4893 if (scall == MAT_INITIAL_MATRIX) { 4894 if (size == 1) { 4895 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4896 PetscFunctionReturn(0); 4897 } 4898 4899 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4900 ci[0] = 0; 4901 for (i=0; i<am; i++) { 4902 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4903 } 4904 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4905 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4906 k = 0; 4907 for (i=0; i<am; i++) { 4908 ncols_o = bi[i+1] - bi[i]; 4909 ncols_d = ai[i+1] - ai[i]; 4910 /* off-diagonal portion of A */ 4911 for (jo=0; jo<ncols_o; jo++) { 4912 col = cmap[*bj]; 4913 if (col >= cstart) break; 4914 cj[k] = col; bj++; 4915 ca[k++] = *ba++; 4916 } 4917 /* diagonal portion of A */ 4918 for (j=0; j<ncols_d; j++) { 4919 cj[k] = cstart + *aj++; 4920 ca[k++] = *aa++; 4921 } 4922 /* off-diagonal portion of A */ 4923 for (j=jo; j<ncols_o; j++) { 4924 cj[k] = cmap[*bj++]; 4925 ca[k++] = *ba++; 4926 } 4927 } 4928 /* put together the new matrix */ 4929 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4930 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4931 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4932 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4933 mat->free_a = PETSC_TRUE; 4934 mat->free_ij = PETSC_TRUE; 4935 mat->nonew = 0; 4936 } else if (scall == MAT_REUSE_MATRIX) { 4937 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4938 ci = mat->i; cj = mat->j; cam = mat->a; 4939 for (i=0; i<am; i++) { 4940 /* off-diagonal portion of A */ 4941 ncols_o = bi[i+1] - bi[i]; 4942 for (jo=0; jo<ncols_o; jo++) { 4943 col = cmap[*bj]; 4944 if (col >= cstart) break; 4945 *cam++ = *ba++; bj++; 4946 } 4947 /* diagonal portion of A */ 4948 ncols_d = ai[i+1] - ai[i]; 4949 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4950 /* off-diagonal portion of A */ 4951 for (j=jo; j<ncols_o; j++) { 4952 *cam++ = *ba++; bj++; 4953 } 4954 } 4955 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4956 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4957 PetscFunctionReturn(0); 4958 } 4959 4960 /*@C 4961 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4962 4963 Not Collective 4964 4965 Input Parameters: 4966 + A - the matrix 4967 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4968 - row, col - index sets of rows and columns to extract (or NULL) 4969 4970 Output Parameter: 4971 . A_loc - the local sequential matrix generated 4972 4973 Level: developer 4974 4975 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4976 4977 @*/ 4978 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4979 { 4980 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4981 PetscErrorCode ierr; 4982 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4983 IS isrowa,iscola; 4984 Mat *aloc; 4985 PetscBool match; 4986 4987 PetscFunctionBegin; 4988 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4989 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4990 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4991 if (!row) { 4992 start = A->rmap->rstart; end = A->rmap->rend; 4993 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4994 } else { 4995 isrowa = *row; 4996 } 4997 if (!col) { 4998 start = A->cmap->rstart; 4999 cmap = a->garray; 5000 nzA = a->A->cmap->n; 5001 nzB = a->B->cmap->n; 5002 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5003 ncols = 0; 5004 for (i=0; i<nzB; i++) { 5005 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5006 else break; 5007 } 5008 imark = i; 5009 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5010 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5011 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5012 } else { 5013 iscola = *col; 5014 } 5015 if (scall != MAT_INITIAL_MATRIX) { 5016 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5017 aloc[0] = *A_loc; 5018 } 5019 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5020 *A_loc = aloc[0]; 5021 ierr = PetscFree(aloc);CHKERRQ(ierr); 5022 if (!row) { 5023 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5024 } 5025 if (!col) { 5026 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5027 } 5028 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5029 PetscFunctionReturn(0); 5030 } 5031 5032 /*@C 5033 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5034 5035 Collective on Mat 5036 5037 Input Parameters: 5038 + A,B - the matrices in mpiaij format 5039 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5040 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5041 5042 Output Parameter: 5043 + rowb, colb - index sets of rows and columns of B to extract 5044 - B_seq - the sequential matrix generated 5045 5046 Level: developer 5047 5048 @*/ 5049 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5050 { 5051 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5052 PetscErrorCode ierr; 5053 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5054 IS isrowb,iscolb; 5055 Mat *bseq=NULL; 5056 5057 PetscFunctionBegin; 5058 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5059 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5060 } 5061 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5062 5063 if (scall == MAT_INITIAL_MATRIX) { 5064 start = A->cmap->rstart; 5065 cmap = a->garray; 5066 nzA = a->A->cmap->n; 5067 nzB = a->B->cmap->n; 5068 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5069 ncols = 0; 5070 for (i=0; i<nzB; i++) { /* row < local row index */ 5071 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5072 else break; 5073 } 5074 imark = i; 5075 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5076 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5077 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5078 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5079 } else { 5080 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5081 isrowb = *rowb; iscolb = *colb; 5082 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5083 bseq[0] = *B_seq; 5084 } 5085 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5086 *B_seq = bseq[0]; 5087 ierr = PetscFree(bseq);CHKERRQ(ierr); 5088 if (!rowb) { 5089 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5090 } else { 5091 *rowb = isrowb; 5092 } 5093 if (!colb) { 5094 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5095 } else { 5096 *colb = iscolb; 5097 } 5098 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5099 PetscFunctionReturn(0); 5100 } 5101 5102 /* 5103 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5104 of the OFF-DIAGONAL portion of local A 5105 5106 Collective on Mat 5107 5108 Input Parameters: 5109 + A,B - the matrices in mpiaij format 5110 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5111 5112 Output Parameter: 5113 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5114 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5115 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5116 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5117 5118 Level: developer 5119 5120 */ 5121 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5122 { 5123 VecScatter_MPI_General *gen_to,*gen_from; 5124 PetscErrorCode ierr; 5125 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5126 Mat_SeqAIJ *b_oth; 5127 VecScatter ctx =a->Mvctx; 5128 MPI_Comm comm; 5129 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5130 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5131 PetscInt *rvalues,*svalues; 5132 MatScalar *b_otha,*bufa,*bufA; 5133 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5134 MPI_Request *rwaits = NULL,*swaits = NULL; 5135 MPI_Status *sstatus,rstatus; 5136 PetscMPIInt jj,size; 5137 PetscInt *cols,sbs,rbs; 5138 PetscScalar *vals; 5139 5140 PetscFunctionBegin; 5141 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5142 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5143 5144 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5145 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5146 } 5147 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5148 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5149 5150 if (size == 1) { 5151 startsj_s = NULL; 5152 bufa_ptr = NULL; 5153 *B_oth = NULL; 5154 PetscFunctionReturn(0); 5155 } 5156 5157 gen_to = (VecScatter_MPI_General*)ctx->todata; 5158 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5159 nrecvs = gen_from->n; 5160 nsends = gen_to->n; 5161 5162 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5163 srow = gen_to->indices; /* local row index to be sent */ 5164 sstarts = gen_to->starts; 5165 sprocs = gen_to->procs; 5166 sstatus = gen_to->sstatus; 5167 sbs = gen_to->bs; 5168 rstarts = gen_from->starts; 5169 rprocs = gen_from->procs; 5170 rbs = gen_from->bs; 5171 5172 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5173 if (scall == MAT_INITIAL_MATRIX) { 5174 /* i-array */ 5175 /*---------*/ 5176 /* post receives */ 5177 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5178 for (i=0; i<nrecvs; i++) { 5179 rowlen = rvalues + rstarts[i]*rbs; 5180 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5181 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5182 } 5183 5184 /* pack the outgoing message */ 5185 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5186 5187 sstartsj[0] = 0; 5188 rstartsj[0] = 0; 5189 len = 0; /* total length of j or a array to be sent */ 5190 k = 0; 5191 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5192 for (i=0; i<nsends; i++) { 5193 rowlen = svalues + sstarts[i]*sbs; 5194 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5195 for (j=0; j<nrows; j++) { 5196 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5197 for (l=0; l<sbs; l++) { 5198 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5199 5200 rowlen[j*sbs+l] = ncols; 5201 5202 len += ncols; 5203 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5204 } 5205 k++; 5206 } 5207 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5208 5209 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5210 } 5211 /* recvs and sends of i-array are completed */ 5212 i = nrecvs; 5213 while (i--) { 5214 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5215 } 5216 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5217 ierr = PetscFree(svalues);CHKERRQ(ierr); 5218 5219 /* allocate buffers for sending j and a arrays */ 5220 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5221 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5222 5223 /* create i-array of B_oth */ 5224 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5225 5226 b_othi[0] = 0; 5227 len = 0; /* total length of j or a array to be received */ 5228 k = 0; 5229 for (i=0; i<nrecvs; i++) { 5230 rowlen = rvalues + rstarts[i]*rbs; 5231 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5232 for (j=0; j<nrows; j++) { 5233 b_othi[k+1] = b_othi[k] + rowlen[j]; 5234 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5235 k++; 5236 } 5237 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5238 } 5239 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5240 5241 /* allocate space for j and a arrrays of B_oth */ 5242 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5243 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5244 5245 /* j-array */ 5246 /*---------*/ 5247 /* post receives of j-array */ 5248 for (i=0; i<nrecvs; i++) { 5249 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5250 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5251 } 5252 5253 /* pack the outgoing message j-array */ 5254 k = 0; 5255 for (i=0; i<nsends; i++) { 5256 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5257 bufJ = bufj+sstartsj[i]; 5258 for (j=0; j<nrows; j++) { 5259 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5260 for (ll=0; ll<sbs; ll++) { 5261 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5262 for (l=0; l<ncols; l++) { 5263 *bufJ++ = cols[l]; 5264 } 5265 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5266 } 5267 } 5268 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5269 } 5270 5271 /* recvs and sends of j-array are completed */ 5272 i = nrecvs; 5273 while (i--) { 5274 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5275 } 5276 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5277 } else if (scall == MAT_REUSE_MATRIX) { 5278 sstartsj = *startsj_s; 5279 rstartsj = *startsj_r; 5280 bufa = *bufa_ptr; 5281 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5282 b_otha = b_oth->a; 5283 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5284 5285 /* a-array */ 5286 /*---------*/ 5287 /* post receives of a-array */ 5288 for (i=0; i<nrecvs; i++) { 5289 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5290 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5291 } 5292 5293 /* pack the outgoing message a-array */ 5294 k = 0; 5295 for (i=0; i<nsends; i++) { 5296 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5297 bufA = bufa+sstartsj[i]; 5298 for (j=0; j<nrows; j++) { 5299 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5300 for (ll=0; ll<sbs; ll++) { 5301 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5302 for (l=0; l<ncols; l++) { 5303 *bufA++ = vals[l]; 5304 } 5305 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5306 } 5307 } 5308 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5309 } 5310 /* recvs and sends of a-array are completed */ 5311 i = nrecvs; 5312 while (i--) { 5313 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5314 } 5315 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5316 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5317 5318 if (scall == MAT_INITIAL_MATRIX) { 5319 /* put together the new matrix */ 5320 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5321 5322 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5323 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5324 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5325 b_oth->free_a = PETSC_TRUE; 5326 b_oth->free_ij = PETSC_TRUE; 5327 b_oth->nonew = 0; 5328 5329 ierr = PetscFree(bufj);CHKERRQ(ierr); 5330 if (!startsj_s || !bufa_ptr) { 5331 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5332 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5333 } else { 5334 *startsj_s = sstartsj; 5335 *startsj_r = rstartsj; 5336 *bufa_ptr = bufa; 5337 } 5338 } 5339 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5340 PetscFunctionReturn(0); 5341 } 5342 5343 /*@C 5344 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5345 5346 Not Collective 5347 5348 Input Parameters: 5349 . A - The matrix in mpiaij format 5350 5351 Output Parameter: 5352 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5353 . colmap - A map from global column index to local index into lvec 5354 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5355 5356 Level: developer 5357 5358 @*/ 5359 #if defined(PETSC_USE_CTABLE) 5360 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5361 #else 5362 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5363 #endif 5364 { 5365 Mat_MPIAIJ *a; 5366 5367 PetscFunctionBegin; 5368 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5369 PetscValidPointer(lvec, 2); 5370 PetscValidPointer(colmap, 3); 5371 PetscValidPointer(multScatter, 4); 5372 a = (Mat_MPIAIJ*) A->data; 5373 if (lvec) *lvec = a->lvec; 5374 if (colmap) *colmap = a->colmap; 5375 if (multScatter) *multScatter = a->Mvctx; 5376 PetscFunctionReturn(0); 5377 } 5378 5379 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5380 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5381 #if defined(PETSC_HAVE_MKL_SPARSE) 5382 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5383 #endif 5384 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5385 #if defined(PETSC_HAVE_ELEMENTAL) 5386 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5387 #endif 5388 #if defined(PETSC_HAVE_HYPRE) 5389 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5390 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5391 #endif 5392 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5393 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5394 5395 /* 5396 Computes (B'*A')' since computing B*A directly is untenable 5397 5398 n p p 5399 ( ) ( ) ( ) 5400 m ( A ) * n ( B ) = m ( C ) 5401 ( ) ( ) ( ) 5402 5403 */ 5404 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5405 { 5406 PetscErrorCode ierr; 5407 Mat At,Bt,Ct; 5408 5409 PetscFunctionBegin; 5410 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5411 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5412 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5413 ierr = MatDestroy(&At);CHKERRQ(ierr); 5414 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5415 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5416 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5417 PetscFunctionReturn(0); 5418 } 5419 5420 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5421 { 5422 PetscErrorCode ierr; 5423 PetscInt m=A->rmap->n,n=B->cmap->n; 5424 Mat Cmat; 5425 5426 PetscFunctionBegin; 5427 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5428 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5429 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5430 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5431 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5432 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5433 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5434 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5435 5436 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5437 5438 *C = Cmat; 5439 PetscFunctionReturn(0); 5440 } 5441 5442 /* ----------------------------------------------------------------*/ 5443 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5444 { 5445 PetscErrorCode ierr; 5446 5447 PetscFunctionBegin; 5448 if (scall == MAT_INITIAL_MATRIX) { 5449 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5450 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5451 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5452 } 5453 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5454 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5455 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5456 PetscFunctionReturn(0); 5457 } 5458 5459 /*MC 5460 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5461 5462 Options Database Keys: 5463 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5464 5465 Level: beginner 5466 5467 .seealso: MatCreateAIJ() 5468 M*/ 5469 5470 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5471 { 5472 Mat_MPIAIJ *b; 5473 PetscErrorCode ierr; 5474 PetscMPIInt size; 5475 5476 PetscFunctionBegin; 5477 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5478 5479 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5480 B->data = (void*)b; 5481 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5482 B->assembled = PETSC_FALSE; 5483 B->insertmode = NOT_SET_VALUES; 5484 b->size = size; 5485 5486 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5487 5488 /* build cache for off array entries formed */ 5489 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5490 5491 b->donotstash = PETSC_FALSE; 5492 b->colmap = 0; 5493 b->garray = 0; 5494 b->roworiented = PETSC_TRUE; 5495 5496 /* stuff used for matrix vector multiply */ 5497 b->lvec = NULL; 5498 b->Mvctx = NULL; 5499 5500 /* stuff for MatGetRow() */ 5501 b->rowindices = 0; 5502 b->rowvalues = 0; 5503 b->getrowactive = PETSC_FALSE; 5504 5505 /* flexible pointer used in CUSP/CUSPARSE classes */ 5506 b->spptr = NULL; 5507 5508 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5509 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5510 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5511 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5512 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5513 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5514 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5515 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5516 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5517 #if defined(PETSC_HAVE_MKL_SPARSE) 5518 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5519 #endif 5520 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5521 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5522 #if defined(PETSC_HAVE_ELEMENTAL) 5523 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5524 #endif 5525 #if defined(PETSC_HAVE_HYPRE) 5526 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5527 #endif 5528 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5529 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5530 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5531 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5532 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5533 #if defined(PETSC_HAVE_HYPRE) 5534 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5535 #endif 5536 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5537 PetscFunctionReturn(0); 5538 } 5539 5540 /*@C 5541 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5542 and "off-diagonal" part of the matrix in CSR format. 5543 5544 Collective on MPI_Comm 5545 5546 Input Parameters: 5547 + comm - MPI communicator 5548 . m - number of local rows (Cannot be PETSC_DECIDE) 5549 . n - This value should be the same as the local size used in creating the 5550 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5551 calculated if N is given) For square matrices n is almost always m. 5552 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5553 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5554 . i - row indices for "diagonal" portion of matrix 5555 . j - column indices 5556 . a - matrix values 5557 . oi - row indices for "off-diagonal" portion of matrix 5558 . oj - column indices 5559 - oa - matrix values 5560 5561 Output Parameter: 5562 . mat - the matrix 5563 5564 Level: advanced 5565 5566 Notes: 5567 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5568 must free the arrays once the matrix has been destroyed and not before. 5569 5570 The i and j indices are 0 based 5571 5572 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5573 5574 This sets local rows and cannot be used to set off-processor values. 5575 5576 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5577 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5578 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5579 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5580 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5581 communication if it is known that only local entries will be set. 5582 5583 .keywords: matrix, aij, compressed row, sparse, parallel 5584 5585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5586 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5587 @*/ 5588 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5589 { 5590 PetscErrorCode ierr; 5591 Mat_MPIAIJ *maij; 5592 5593 PetscFunctionBegin; 5594 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5595 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5596 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5597 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5598 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5599 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5600 maij = (Mat_MPIAIJ*) (*mat)->data; 5601 5602 (*mat)->preallocated = PETSC_TRUE; 5603 5604 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5605 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5606 5607 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5608 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5609 5610 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5611 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5612 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5613 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5614 5615 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5616 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5617 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5618 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5619 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 /* 5624 Special version for direct calls from Fortran 5625 */ 5626 #include <petsc/private/fortranimpl.h> 5627 5628 /* Change these macros so can be used in void function */ 5629 #undef CHKERRQ 5630 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5631 #undef SETERRQ2 5632 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5633 #undef SETERRQ3 5634 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5635 #undef SETERRQ 5636 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5637 5638 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5639 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5640 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5641 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5642 #else 5643 #endif 5644 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5645 { 5646 Mat mat = *mmat; 5647 PetscInt m = *mm, n = *mn; 5648 InsertMode addv = *maddv; 5649 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5650 PetscScalar value; 5651 PetscErrorCode ierr; 5652 5653 MatCheckPreallocated(mat,1); 5654 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5655 5656 #if defined(PETSC_USE_DEBUG) 5657 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5658 #endif 5659 { 5660 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5661 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5662 PetscBool roworiented = aij->roworiented; 5663 5664 /* Some Variables required in the macro */ 5665 Mat A = aij->A; 5666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5667 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5668 MatScalar *aa = a->a; 5669 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5670 Mat B = aij->B; 5671 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5672 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5673 MatScalar *ba = b->a; 5674 5675 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5676 PetscInt nonew = a->nonew; 5677 MatScalar *ap1,*ap2; 5678 5679 PetscFunctionBegin; 5680 for (i=0; i<m; i++) { 5681 if (im[i] < 0) continue; 5682 #if defined(PETSC_USE_DEBUG) 5683 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5684 #endif 5685 if (im[i] >= rstart && im[i] < rend) { 5686 row = im[i] - rstart; 5687 lastcol1 = -1; 5688 rp1 = aj + ai[row]; 5689 ap1 = aa + ai[row]; 5690 rmax1 = aimax[row]; 5691 nrow1 = ailen[row]; 5692 low1 = 0; 5693 high1 = nrow1; 5694 lastcol2 = -1; 5695 rp2 = bj + bi[row]; 5696 ap2 = ba + bi[row]; 5697 rmax2 = bimax[row]; 5698 nrow2 = bilen[row]; 5699 low2 = 0; 5700 high2 = nrow2; 5701 5702 for (j=0; j<n; j++) { 5703 if (roworiented) value = v[i*n+j]; 5704 else value = v[i+j*m]; 5705 if (in[j] >= cstart && in[j] < cend) { 5706 col = in[j] - cstart; 5707 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5708 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5709 } else if (in[j] < 0) continue; 5710 #if defined(PETSC_USE_DEBUG) 5711 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5712 #endif 5713 else { 5714 if (mat->was_assembled) { 5715 if (!aij->colmap) { 5716 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5717 } 5718 #if defined(PETSC_USE_CTABLE) 5719 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5720 col--; 5721 #else 5722 col = aij->colmap[in[j]] - 1; 5723 #endif 5724 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5725 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5726 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5727 col = in[j]; 5728 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5729 B = aij->B; 5730 b = (Mat_SeqAIJ*)B->data; 5731 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5732 rp2 = bj + bi[row]; 5733 ap2 = ba + bi[row]; 5734 rmax2 = bimax[row]; 5735 nrow2 = bilen[row]; 5736 low2 = 0; 5737 high2 = nrow2; 5738 bm = aij->B->rmap->n; 5739 ba = b->a; 5740 } 5741 } else col = in[j]; 5742 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5743 } 5744 } 5745 } else if (!aij->donotstash) { 5746 if (roworiented) { 5747 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5748 } else { 5749 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5750 } 5751 } 5752 } 5753 } 5754 PetscFunctionReturnVoid(); 5755 } 5756 5757