1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 51 PetscFunctionBegin; 52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53 A->boundtocpu = flg; 54 #endif 55 if (a->A) { 56 PetscCall(MatBindToCPU(a->A,flg)); 57 } 58 if (a->B) { 59 PetscCall(MatBindToCPU(a->B,flg)); 60 } 61 62 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 63 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 64 * to differ from the parent matrix. */ 65 if (a->lvec) { 66 PetscCall(VecBindToCPU(a->lvec,flg)); 67 } 68 if (a->diag) { 69 PetscCall(VecBindToCPU(a->diag,flg)); 70 } 71 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 76 { 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 78 79 PetscFunctionBegin; 80 if (mat->A) { 81 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 82 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 83 } 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 88 { 89 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 90 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 91 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 92 const PetscInt *ia,*ib; 93 const MatScalar *aa,*bb,*aav,*bav; 94 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 95 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 96 97 PetscFunctionBegin; 98 *keptrows = NULL; 99 100 ia = a->i; 101 ib = b->i; 102 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 103 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 104 for (i=0; i<m; i++) { 105 na = ia[i+1] - ia[i]; 106 nb = ib[i+1] - ib[i]; 107 if (!na && !nb) { 108 cnt++; 109 goto ok1; 110 } 111 aa = aav + ia[i]; 112 for (j=0; j<na; j++) { 113 if (aa[j] != 0.0) goto ok1; 114 } 115 bb = bav + ib[i]; 116 for (j=0; j <nb; j++) { 117 if (bb[j] != 0.0) goto ok1; 118 } 119 cnt++; 120 ok1:; 121 } 122 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123 if (!n0rows) { 124 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126 PetscFunctionReturn(0); 127 } 128 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 129 cnt = 0; 130 for (i=0; i<m; i++) { 131 na = ia[i+1] - ia[i]; 132 nb = ib[i+1] - ib[i]; 133 if (!na && !nb) continue; 134 aa = aav + ia[i]; 135 for (j=0; j<na;j++) { 136 if (aa[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 bb = bav + ib[i]; 142 for (j=0; j<nb; j++) { 143 if (bb[j] != 0.0) { 144 rows[cnt++] = rstart + i; 145 goto ok2; 146 } 147 } 148 ok2:; 149 } 150 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 151 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 152 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 159 PetscBool cong; 160 161 PetscFunctionBegin; 162 PetscCall(MatHasCongruentLayouts(Y,&cong)); 163 if (Y->assembled && cong) { 164 PetscCall(MatDiagonalSet(aij->A,D,is)); 165 } else { 166 PetscCall(MatDiagonalSet_Default(Y,D,is)); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174 PetscInt i,rstart,nrows,*rows; 175 176 PetscFunctionBegin; 177 *zrows = NULL; 178 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 179 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180 for (i=0; i<nrows; i++) rows[i] += rstart; 181 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188 PetscInt i,m,n,*garray = aij->garray; 189 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 190 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 191 PetscReal *work; 192 const PetscScalar *dummy; 193 194 PetscFunctionBegin; 195 PetscCall(MatGetSize(A,&m,&n)); 196 PetscCall(PetscCalloc1(n,&work)); 197 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 198 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 199 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 200 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201 if (type == NORM_2) { 202 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 203 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 204 } 205 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 206 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 207 } 208 } else if (type == NORM_1) { 209 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 210 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 211 } 212 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 213 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } 215 } else if (type == NORM_INFINITY) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 221 } 222 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } 229 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235 } 236 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237 if (type == NORM_INFINITY) { 238 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 239 } else { 240 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 241 } 242 PetscCall(PetscFree(work)); 243 if (type == NORM_2) { 244 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246 for (i=0; i<n; i++) reductions[i] /= m; 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252 { 253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254 IS sis,gis; 255 const PetscInt *isis,*igis; 256 PetscInt n,*iis,nsis,ngis,rstart,i; 257 258 PetscFunctionBegin; 259 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 260 PetscCall(MatFindNonzeroRows(a->B,&gis)); 261 PetscCall(ISGetSize(gis,&ngis)); 262 PetscCall(ISGetSize(sis,&nsis)); 263 PetscCall(ISGetIndices(sis,&isis)); 264 PetscCall(ISGetIndices(gis,&igis)); 265 266 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 267 PetscCall(PetscArraycpy(iis,igis,ngis)); 268 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269 n = ngis + nsis; 270 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 271 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272 for (i=0; i<n; i++) iis[i] += rstart; 273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274 275 PetscCall(ISRestoreIndices(sis,&isis)); 276 PetscCall(ISRestoreIndices(gis,&igis)); 277 PetscCall(ISDestroy(&sis)); 278 PetscCall(ISDestroy(&gis)); 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Local utility routine that creates a mapping from the global column 284 number to the local number in the off-diagonal part of the local 285 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 286 a slightly higher hash table cost; without it it is not scalable (each processor 287 has an order N integer array but is fast to access. 288 */ 289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 290 { 291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292 PetscInt n = aij->B->cmap->n,i; 293 294 PetscFunctionBegin; 295 PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298 for (i=0; i<n; i++) { 299 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300 } 301 #else 302 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 303 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305 #endif 306 PetscFunctionReturn(0); 307 } 308 309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 310 { \ 311 if (col <= lastcol1) low1 = 0; \ 312 else high1 = nrow1; \ 313 lastcol1 = col;\ 314 while (high1-low1 > 5) { \ 315 t = (low1+high1)/2; \ 316 if (rp1[t] > col) high1 = t; \ 317 else low1 = t; \ 318 } \ 319 for (_i=low1; _i<high1; _i++) { \ 320 if (rp1[_i] > col) break; \ 321 if (rp1[_i] == col) { \ 322 if (addv == ADD_VALUES) { \ 323 ap1[_i] += value; \ 324 /* Not sure LogFlops will slow dow the code or not */ \ 325 (void)PetscLogFlops(1.0); \ 326 } \ 327 else ap1[_i] = value; \ 328 goto a_noinsert; \ 329 } \ 330 } \ 331 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 333 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335 N = nrow1++ - 1; a->nz++; high1++; \ 336 /* shift up all the later entries in this row */ \ 337 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 338 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339 rp1[_i] = col; \ 340 ap1[_i] = value; \ 341 A->nonzerostate++;\ 342 a_noinsert: ; \ 343 ailen[row] = nrow1; \ 344 } 345 346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 347 { \ 348 if (col <= lastcol2) low2 = 0; \ 349 else high2 = nrow2; \ 350 lastcol2 = col; \ 351 while (high2-low2 > 5) { \ 352 t = (low2+high2)/2; \ 353 if (rp2[t] > col) high2 = t; \ 354 else low2 = t; \ 355 } \ 356 for (_i=low2; _i<high2; _i++) { \ 357 if (rp2[_i] > col) break; \ 358 if (rp2[_i] == col) { \ 359 if (addv == ADD_VALUES) { \ 360 ap2[_i] += value; \ 361 (void)PetscLogFlops(1.0); \ 362 } \ 363 else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 369 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371 N = nrow2++ - 1; b->nz++; high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 374 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert: ; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 383 { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 386 PetscInt l,*garray = mat->garray,diag; 387 PetscScalar *aa,*ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 394 row = row - diag; 395 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 396 if (garray[b->j[b->i[row]+l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 400 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row+1]-a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 407 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row+1]-b->i[row]-l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 421 { 422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423 PetscScalar value = 0.0; 424 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426 PetscBool roworiented = aij->roworiented; 427 428 /* Some Variables required in the macro */ 429 Mat A = aij->A; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432 PetscBool ignorezeroentries = a->ignorezeroentries; 433 Mat B = aij->B; 434 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436 MatScalar *aa,*ba; 437 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 438 PetscInt nonew; 439 MatScalar *ap1,*ap2; 440 441 PetscFunctionBegin; 442 PetscCall(MatSeqAIJGetArray(A,&aa)); 443 PetscCall(MatSeqAIJGetArray(B,&ba)); 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 } else if (in[j] < 0) continue; 472 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477 } 478 #if defined(PETSC_USE_CTABLE) 479 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 485 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 500 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 501 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 502 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 503 } 504 } else col = in[j]; 505 nonew = b->nonew; 506 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 507 } 508 } 509 } else { 510 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515 } else { 516 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } 518 } 519 } 520 } 521 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 522 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 528 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 530 */ 531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539 PetscInt *ailen = a->ilen,*aj = a->j; 540 PetscInt *bilen = b->ilen,*bj = b->j; 541 PetscInt am = aij->A->rmap->n,j; 542 PetscInt diag_so_far = 0,dnz; 543 PetscInt offd_so_far = 0,onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j=0; j<am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 581 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen,*aj = a->j; 583 PetscInt *bilen = b->ilen,*bj = b->j; 584 PetscInt am = aij->A->rmap->n,j; 585 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587 PetscScalar *aa = a->a,*ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j=0; j<am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag+dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd+onz_row] = mat_j[col]; 604 ba[rowstart_offd+onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619 620 PetscFunctionBegin; 621 for (i=0; i<m; i++) { 622 if (idxm[i] < 0) continue; /* negative row */ 623 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624 if (idxm[i] >= rstart && idxm[i] < rend) { 625 row = idxm[i] - rstart; 626 for (j=0; j<n; j++) { 627 if (idxn[j] < 0) continue; /* negative column */ 628 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629 if (idxn[j] >= cstart && idxn[j] < cend) { 630 col = idxn[j] - cstart; 631 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632 } else { 633 if (!aij->colmap) { 634 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635 } 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638 col--; 639 #else 640 col = aij->colmap[idxn[j]] - 1; 641 #endif 642 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643 else { 644 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 662 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 663 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693 i = j; 694 } 695 } 696 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 703 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 704 } 705 #endif 706 PetscCall(MatAssemblyBegin(aij->A,mode)); 707 PetscCall(MatAssemblyEnd(aij->A,mode)); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 718 PetscCall(MatDisAssemble_MPIAIJ(mat)); 719 } 720 } 721 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723 } 724 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 725 #if defined(PETSC_HAVE_DEVICE) 726 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727 #endif 728 PetscCall(MatAssemblyBegin(aij->B,mode)); 729 PetscCall(MatAssemblyEnd(aij->B,mode)); 730 731 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 732 733 aij->rowvalues = NULL; 734 735 PetscCall(VecDestroy(&aij->diag)); 736 737 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 738 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 740 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741 } 742 #if defined(PETSC_HAVE_DEVICE) 743 mat->offloadmask = PETSC_OFFLOAD_BOTH; 744 #endif 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 749 { 750 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 751 752 PetscFunctionBegin; 753 PetscCall(MatZeroEntries(l->A)); 754 PetscCall(MatZeroEntries(l->B)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 759 { 760 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761 PetscObjectState sA, sB; 762 PetscInt *lrows; 763 PetscInt r, len; 764 PetscBool cong, lch, gch; 765 766 PetscFunctionBegin; 767 /* get locally owned rows */ 768 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 769 PetscCall(MatHasCongruentLayouts(A,&cong)); 770 /* fix right hand side if needed */ 771 if (x && b) { 772 const PetscScalar *xx; 773 PetscScalar *bb; 774 775 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 776 PetscCall(VecGetArrayRead(x, &xx)); 777 PetscCall(VecGetArray(b, &bb)); 778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 779 PetscCall(VecRestoreArrayRead(x, &xx)); 780 PetscCall(VecRestoreArray(b, &bb)); 781 } 782 783 sA = mat->A->nonzerostate; 784 sB = mat->B->nonzerostate; 785 786 if (diag != 0.0 && cong) { 787 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 788 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792 PetscInt nnwA, nnwB; 793 PetscBool nnzA, nnzB; 794 795 nnwA = aijA->nonew; 796 nnwB = aijB->nonew; 797 nnzA = aijA->keepnonzeropattern; 798 nnzB = aijB->keepnonzeropattern; 799 if (!nnzA) { 800 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801 aijA->nonew = 0; 802 } 803 if (!nnzB) { 804 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805 aijB->nonew = 0; 806 } 807 /* Must zero here before the next loop */ 808 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 809 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 810 for (r = 0; r < len; ++r) { 811 const PetscInt row = lrows[r] + A->rmap->rstart; 812 if (row >= A->cmap->N) continue; 813 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814 } 815 aijA->nonew = nnwA; 816 aijB->nonew = nnwB; 817 } else { 818 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 819 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 820 } 821 PetscCall(PetscFree(lrows)); 822 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 823 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 824 825 /* reduce nonzerostate */ 826 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 827 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828 if (gch) A->nonzerostate++; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 833 { 834 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 835 PetscMPIInt n = A->rmap->n; 836 PetscInt i,j,r,m,len = 0; 837 PetscInt *lrows,*owners = A->rmap->range; 838 PetscMPIInt p = 0; 839 PetscSFNode *rrows; 840 PetscSF sf; 841 const PetscScalar *xx; 842 PetscScalar *bb,*mask,*aij_a; 843 Vec xmask,lmask; 844 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845 const PetscInt *aj, *ii,*ridx; 846 PetscScalar *aa; 847 848 PetscFunctionBegin; 849 /* Create SF where leaves are input rows and roots are owned rows */ 850 PetscCall(PetscMalloc1(n, &lrows)); 851 for (r = 0; r < n; ++r) lrows[r] = -1; 852 PetscCall(PetscMalloc1(N, &rrows)); 853 for (r = 0; r < N; ++r) { 854 const PetscInt idx = rows[r]; 855 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 856 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 857 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 858 } 859 rrows[r].rank = p; 860 rrows[r].index = rows[r] - owners[p]; 861 } 862 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 863 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 864 /* Collect flags for rows to be zeroed */ 865 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 866 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 867 PetscCall(PetscSFDestroy(&sf)); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872 /* handle off diagonal part of matrix */ 873 PetscCall(MatCreateVecs(A,&xmask,NULL)); 874 PetscCall(VecDuplicate(l->lvec,&lmask)); 875 PetscCall(VecGetArray(xmask,&bb)); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 PetscCall(VecRestoreArray(xmask,&bb)); 878 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 880 PetscCall(VecDestroy(&xmask)); 881 if (x && b) { /* this code is buggy when the row and column layout don't match */ 882 PetscBool cong; 883 884 PetscCall(MatHasCongruentLayouts(A,&cong)); 885 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 886 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 888 PetscCall(VecGetArrayRead(l->lvec,&xx)); 889 PetscCall(VecGetArray(b,&bb)); 890 } 891 PetscCall(VecGetArray(lmask,&mask)); 892 /* remove zeroed rows of off diagonal matrix */ 893 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894 ii = aij->i; 895 for (i=0; i<len; i++) { 896 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 897 } 898 /* loop over all elements of off process part of matrix zeroing removed columns*/ 899 if (aij->compressedrow.use) { 900 m = aij->compressedrow.nrows; 901 ii = aij->compressedrow.i; 902 ridx = aij->compressedrow.rindex; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij_a + ii[i]; 907 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[*ridx] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 ridx++; 917 } 918 } else { /* do not use compressed row format */ 919 m = l->B->rmap->n; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij_a + ii[i]; 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[i] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 } 933 } 934 if (x && b) { 935 PetscCall(VecRestoreArray(b,&bb)); 936 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937 } 938 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 939 PetscCall(VecRestoreArray(lmask,&mask)); 940 PetscCall(VecDestroy(&lmask)); 941 PetscCall(PetscFree(lrows)); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscInt nt; 955 VecScatter Mvctx = a->Mvctx; 956 957 PetscFunctionBegin; 958 PetscCall(VecGetLocalSize(xx,&nt)); 959 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 960 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 961 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 962 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 963 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 971 PetscFunctionBegin; 972 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 VecScatter Mvctx = a->Mvctx; 980 981 PetscFunctionBegin; 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 /* do nondiagonal part */ 995 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996 /* do local part */ 997 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 998 /* add partial results together */ 999 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1000 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005 { 1006 MPI_Comm comm; 1007 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1008 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009 IS Me,Notme; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscBool lf; 1012 PetscMPIInt size; 1013 1014 PetscFunctionBegin; 1015 /* Easy test: symmetric diagonal block */ 1016 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1017 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1018 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019 if (!*f) PetscFunctionReturn(0); 1020 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1021 PetscCallMPI(MPI_Comm_size(comm,&size)); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 PetscCall(MatGetSize(Amat,&M,&N)); 1026 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1027 PetscCall(PetscMalloc1(N-last+first,¬me)); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1031 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1032 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1033 Aoff = Aoffs[0]; 1034 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1035 Boff = Boffs[0]; 1036 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1037 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1038 PetscCall(MatDestroyMatrices(1,&Boffs)); 1039 PetscCall(ISDestroy(&Me)); 1040 PetscCall(ISDestroy(&Notme)); 1041 PetscCall(PetscFree(notme)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscFunctionBegin; 1048 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059 /* do local part */ 1060 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1061 /* add partial results together */ 1062 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1063 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 PetscCall(MatGetDiagonal(a->A,v)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 1086 PetscFunctionBegin; 1087 PetscCall(MatScale(a->A,aa)); 1088 PetscCall(MatScale(a->B,aa)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094 { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099 PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1)); 1100 PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2)); 1101 PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2)); 1102 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1103 PetscCall(PetscFree(aij->Cperm1)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1114 #endif 1115 PetscCall(MatStashDestroy_Private(&mat->stash)); 1116 PetscCall(VecDestroy(&aij->diag)); 1117 PetscCall(MatDestroy(&aij->A)); 1118 PetscCall(MatDestroy(&aij->B)); 1119 #if defined(PETSC_USE_CTABLE) 1120 PetscCall(PetscTableDestroy(&aij->colmap)); 1121 #else 1122 PetscCall(PetscFree(aij->colmap)); 1123 #endif 1124 PetscCall(PetscFree(aij->garray)); 1125 PetscCall(VecDestroy(&aij->lvec)); 1126 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1127 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1128 PetscCall(PetscFree(aij->ld)); 1129 1130 /* Free COO */ 1131 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1132 1133 PetscCall(PetscFree(mat->data)); 1134 1135 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1136 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1137 1138 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1139 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1140 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1141 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1142 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1143 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1144 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1145 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1146 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1147 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1148 #if defined(PETSC_HAVE_CUDA) 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1150 #endif 1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1153 #endif 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1157 #endif 1158 #if defined(PETSC_HAVE_SCALAPACK) 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1160 #endif 1161 #if defined(PETSC_HAVE_HYPRE) 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1164 #endif 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1171 #if defined(PETSC_HAVE_MKL_SPARSE) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1173 #endif 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1185 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1186 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1187 const PetscInt *garray = aij->garray; 1188 const PetscScalar *aa,*ba; 1189 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1190 PetscInt *rowlens; 1191 PetscInt *colidxs; 1192 PetscScalar *matvals; 1193 1194 PetscFunctionBegin; 1195 PetscCall(PetscViewerSetUp(viewer)); 1196 1197 M = mat->rmap->N; 1198 N = mat->cmap->N; 1199 m = mat->rmap->n; 1200 rs = mat->rmap->rstart; 1201 cs = mat->cmap->rstart; 1202 nz = A->nz + B->nz; 1203 1204 /* write matrix header */ 1205 header[0] = MAT_FILE_CLASSID; 1206 header[1] = M; header[2] = N; header[3] = nz; 1207 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1208 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1209 1210 /* fill in and store row lengths */ 1211 PetscCall(PetscMalloc1(m,&rowlens)); 1212 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1213 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1214 PetscCall(PetscFree(rowlens)); 1215 1216 /* fill in and store column indices */ 1217 PetscCall(PetscMalloc1(nz,&colidxs)); 1218 for (cnt=0, i=0; i<m; i++) { 1219 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1220 if (garray[B->j[jb]] > cs) break; 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1224 colidxs[cnt++] = A->j[ja] + cs; 1225 for (; jb<B->i[i+1]; jb++) 1226 colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1235 PetscCall(PetscMalloc1(nz,&matvals)); 1236 for (cnt=0, i=0; i<m; i++) { 1237 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1242 matvals[cnt++] = aa[ja]; 1243 for (; jb<B->i[i+1]; jb++) 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1247 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1248 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1249 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1250 PetscCall(PetscFree(matvals)); 1251 1252 /* write block size option to the viewer's .info file */ 1253 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #include <petscdraw.h> 1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 PetscMPIInt rank = aij->rank,size = aij->size; 1262 PetscBool isdraw,iascii,isbinary; 1263 PetscViewer sviewer; 1264 PetscViewerFormat format; 1265 1266 PetscFunctionBegin; 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1270 if (iascii) { 1271 PetscCall(PetscViewerGetFormat(viewer,&format)); 1272 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1273 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1274 PetscCall(PetscMalloc1(size,&nz)); 1275 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1276 for (i=0; i<(PetscInt)size; i++) { 1277 nmax = PetscMax(nmax,nz[i]); 1278 nmin = PetscMin(nmin,nz[i]); 1279 navg += nz[i]; 1280 } 1281 PetscCall(PetscFree(nz)); 1282 navg = navg/size; 1283 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1284 PetscFunctionReturn(0); 1285 } 1286 PetscCall(PetscViewerGetFormat(viewer,&format)); 1287 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1288 MatInfo info; 1289 PetscInt *inodes=NULL; 1290 1291 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1292 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1293 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1294 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1295 if (!inodes) { 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1298 } else { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1301 } 1302 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1304 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1306 PetscCall(PetscViewerFlush(viewer)); 1307 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1309 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1310 PetscFunctionReturn(0); 1311 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1312 PetscInt inodecount,inodelimit,*inodes; 1313 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1314 if (inodes) { 1315 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1316 } else { 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1318 } 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1321 PetscFunctionReturn(0); 1322 } 1323 } else if (isbinary) { 1324 if (size == 1) { 1325 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1326 PetscCall(MatView(aij->A,viewer)); 1327 } else { 1328 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (iascii && size == 1) { 1332 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1333 PetscCall(MatView(aij->A,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1339 PetscCall(PetscDrawIsNull(draw,&isnull)); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { /* assemble the entire matrix onto first processor */ 1344 Mat A = NULL, Av; 1345 IS isrow,iscol; 1346 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1348 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1349 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1350 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1351 /* The commented code uses MatCreateSubMatrices instead */ 1352 /* 1353 Mat *AA, A = NULL, Av; 1354 IS isrow,iscol; 1355 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1358 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1359 if (rank == 0) { 1360 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1361 A = AA[0]; 1362 Av = AA[0]; 1363 } 1364 PetscCall(MatDestroySubMatrices(1,&AA)); 1365 */ 1366 PetscCall(ISDestroy(&iscol)); 1367 PetscCall(ISDestroy(&isrow)); 1368 /* 1369 Everyone has to call to draw the matrix since the graphics waits are 1370 synchronized across all processors that share the PetscDraw object 1371 */ 1372 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1373 if (rank == 0) { 1374 if (((PetscObject)mat)->name) { 1375 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1376 } 1377 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1378 } 1379 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1380 PetscCall(PetscViewerFlush(viewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1387 { 1388 PetscBool iascii,isdraw,issocket,isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1395 if (iascii || isdraw || isbinary || issocket) { 1396 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1402 { 1403 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1404 Vec bb1 = NULL; 1405 PetscBool hasop; 1406 1407 PetscFunctionBegin; 1408 if (flag == SOR_APPLY_UPPER) { 1409 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1414 PetscCall(VecDuplicate(bb,&bb1)); 1415 } 1416 1417 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1418 if (flag & SOR_ZERO_INITIAL_GUESS) { 1419 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1420 its--; 1421 } 1422 1423 while (its--) { 1424 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1425 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1426 1427 /* update rhs: bb1 = bb - B*x */ 1428 PetscCall(VecScale(mat->lvec,-1.0)); 1429 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1430 1431 /* local sweep */ 1432 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1433 } 1434 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1435 if (flag & SOR_ZERO_INITIAL_GUESS) { 1436 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1437 its--; 1438 } 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec,-1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec,-1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1465 } 1466 } else if (flag & SOR_EISENSTAT) { 1467 Vec xx1; 1468 1469 PetscCall(VecDuplicate(bb,&xx1)); 1470 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1471 1472 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1474 if (!mat->diag) { 1475 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1476 PetscCall(MatGetDiagonal(matin,mat->diag)); 1477 } 1478 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1479 if (hasop) { 1480 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1481 } else { 1482 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1483 } 1484 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1485 1486 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1490 PetscCall(VecAXPY(xx,1.0,xx1)); 1491 PetscCall(VecDestroy(&xx1)); 1492 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1493 1494 PetscCall(VecDestroy(&bb1)); 1495 1496 matin->factorerrortype = mat->A->factorerrortype; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1501 { 1502 Mat aA,aB,Aperm; 1503 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1504 PetscScalar *aa,*ba; 1505 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1506 PetscSF rowsf,sf; 1507 IS parcolp = NULL; 1508 PetscBool done; 1509 1510 PetscFunctionBegin; 1511 PetscCall(MatGetLocalSize(A,&m,&n)); 1512 PetscCall(ISGetIndices(rowp,&rwant)); 1513 PetscCall(ISGetIndices(colp,&cwant)); 1514 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1515 1516 /* Invert row permutation to find out where my rows should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1518 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1519 PetscCall(PetscSFSetFromOptions(rowsf)); 1520 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1523 1524 /* Invert column permutation to find out where my columns should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1526 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1527 PetscCall(PetscSFSetFromOptions(sf)); 1528 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1531 PetscCall(PetscSFDestroy(&sf)); 1532 1533 PetscCall(ISRestoreIndices(rowp,&rwant)); 1534 PetscCall(ISRestoreIndices(colp,&cwant)); 1535 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1536 1537 /* Find out where my gcols should go */ 1538 PetscCall(MatGetSize(aB,NULL,&ng)); 1539 PetscCall(PetscMalloc1(ng,&gcdest)); 1540 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1541 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1542 PetscCall(PetscSFSetFromOptions(sf)); 1543 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1544 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1545 PetscCall(PetscSFDestroy(&sf)); 1546 1547 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1548 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1549 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1550 for (i=0; i<m; i++) { 1551 PetscInt row = rdest[i]; 1552 PetscMPIInt rowner; 1553 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1554 for (j=ai[i]; j<ai[i+1]; j++) { 1555 PetscInt col = cdest[aj[j]]; 1556 PetscMPIInt cowner; 1557 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 for (j=bi[i]; j<bi[i+1]; j++) { 1562 PetscInt col = gcdest[bj[j]]; 1563 PetscMPIInt cowner; 1564 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 } 1569 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1570 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1571 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1572 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1573 PetscCall(PetscSFDestroy(&rowsf)); 1574 1575 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1576 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1577 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1578 for (i=0; i<m; i++) { 1579 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1580 PetscInt j0,rowlen; 1581 rowlen = ai[i+1] - ai[i]; 1582 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1583 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1584 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1585 } 1586 rowlen = bi[i+1] - bi[i]; 1587 for (j0=j=0; j<rowlen; j0=j) { 1588 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1589 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1590 } 1591 } 1592 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1594 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1595 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1596 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1597 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1598 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1599 PetscCall(PetscFree3(work,rdest,cdest)); 1600 PetscCall(PetscFree(gcdest)); 1601 if (parcolp) PetscCall(ISDestroy(&colp)); 1602 *B = Aperm; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1607 { 1608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1609 1610 PetscFunctionBegin; 1611 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscLogDouble isend[5],irecv[5]; 1621 1622 PetscFunctionBegin; 1623 info->block_size = 1.0; 1624 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1625 1626 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1627 isend[3] = info->memory; isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1630 1631 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1632 isend[3] += info->memory; isend[4] += info->mallocs; 1633 if (flag == MAT_LOCAL) { 1634 info->nz_used = isend[0]; 1635 info->nz_allocated = isend[1]; 1636 info->nz_unneeded = isend[2]; 1637 info->memory = isend[3]; 1638 info->mallocs = isend[4]; 1639 } else if (flag == MAT_GLOBAL_MAX) { 1640 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1641 1642 info->nz_used = irecv[0]; 1643 info->nz_allocated = irecv[1]; 1644 info->nz_unneeded = irecv[2]; 1645 info->memory = irecv[3]; 1646 info->mallocs = irecv[4]; 1647 } else if (flag == MAT_GLOBAL_SUM) { 1648 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } 1656 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1657 info->fill_ratio_needed = 0; 1658 info->factor_mallocs = 0; 1659 PetscFunctionReturn(0); 1660 } 1661 1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 1666 PetscFunctionBegin; 1667 switch (op) { 1668 case MAT_NEW_NONZERO_LOCATIONS: 1669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1670 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1671 case MAT_KEEP_NONZERO_PATTERN: 1672 case MAT_NEW_NONZERO_LOCATION_ERR: 1673 case MAT_USE_INODES: 1674 case MAT_IGNORE_ZERO_ENTRIES: 1675 case MAT_FORM_EXPLICIT_TRANSPOSE: 1676 MatCheckPreallocated(A,1); 1677 PetscCall(MatSetOption(a->A,op,flg)); 1678 PetscCall(MatSetOption(a->B,op,flg)); 1679 break; 1680 case MAT_ROW_ORIENTED: 1681 MatCheckPreallocated(A,1); 1682 a->roworiented = flg; 1683 1684 PetscCall(MatSetOption(a->A,op,flg)); 1685 PetscCall(MatSetOption(a->B,op,flg)); 1686 break; 1687 case MAT_FORCE_DIAGONAL_ENTRIES: 1688 case MAT_SORTED_FULL: 1689 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1690 break; 1691 case MAT_IGNORE_OFF_PROC_ENTRIES: 1692 a->donotstash = flg; 1693 break; 1694 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1695 case MAT_SPD: 1696 case MAT_SYMMETRIC: 1697 case MAT_STRUCTURALLY_SYMMETRIC: 1698 case MAT_HERMITIAN: 1699 case MAT_SYMMETRY_ETERNAL: 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1716 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1717 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1718 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1719 PetscInt *cmap,*idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1730 PetscInt max = 1,tmp; 1731 for (i=0; i<matin->rmap->n; i++) { 1732 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1736 } 1737 1738 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1742 if (!v) {pvA = NULL; pvB = NULL;} 1743 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1744 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1745 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1746 nztot = nzA + nzB; 1747 1748 cmap = mat->garray; 1749 if (v || idx) { 1750 if (nztot) { 1751 /* Sort by increasing column numbers, assuming A and B already sorted */ 1752 PetscInt imark = -1; 1753 if (v) { 1754 *v = v_p = mat->rowvalues; 1755 for (i=0; i<nzB; i++) { 1756 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1757 else break; 1758 } 1759 imark = i; 1760 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1761 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1762 } 1763 if (idx) { 1764 *idx = idx_p = mat->rowindices; 1765 if (imark > -1) { 1766 for (i=0; i<imark; i++) { 1767 idx_p[i] = cmap[cworkB[i]]; 1768 } 1769 } else { 1770 for (i=0; i<nzB; i++) { 1771 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1772 else break; 1773 } 1774 imark = i; 1775 } 1776 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1777 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1778 } 1779 } else { 1780 if (idx) *idx = NULL; 1781 if (v) *v = NULL; 1782 } 1783 } 1784 *nz = nztot; 1785 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1786 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1791 { 1792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1793 1794 PetscFunctionBegin; 1795 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1796 aij->getrowactive = PETSC_FALSE; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1801 { 1802 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1803 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1804 PetscInt i,j,cstart = mat->cmap->rstart; 1805 PetscReal sum = 0.0; 1806 const MatScalar *v,*amata,*bmata; 1807 1808 PetscFunctionBegin; 1809 if (aij->size == 1) { 1810 PetscCall(MatNorm(aij->A,type,norm)); 1811 } else { 1812 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1813 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1814 if (type == NORM_FROBENIUS) { 1815 v = amata; 1816 for (i=0; i<amat->nz; i++) { 1817 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1818 } 1819 v = bmata; 1820 for (i=0; i<bmat->nz; i++) { 1821 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1822 } 1823 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp,*tmp2; 1828 PetscInt *jj,*garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1831 *norm = 0.0; 1832 v = amata; jj = amat->j; 1833 for (j=0; j<amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1835 } 1836 v = bmata; jj = bmat->j; 1837 for (j=0; j<bmat->nz; j++) { 1838 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1841 for (j=0; j<mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j=0; j<aij->A->rmap->n; j++) { 1850 v = amata + amat->i[j]; 1851 sum = 0.0; 1852 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); v++; 1854 } 1855 v = bmata + bmat->i[j]; 1856 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); v++; 1858 } 1859 if (sum > ntemp) ntemp = sum; 1860 } 1861 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1862 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1863 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1864 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1865 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 Mat B,A_diag,*B_diag; 1877 const MatScalar *pbv,*bv; 1878 1879 PetscFunctionBegin; 1880 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1881 ai = Aloc->i; aj = Aloc->j; 1882 bi = Bloc->i; bj = Bloc->j; 1883 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1884 PetscInt *d_nnz,*g_nnz,*o_nnz; 1885 PetscSFNode *oloc; 1886 PETSC_UNUSED PetscSF sf; 1887 1888 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1889 /* compute d_nnz for preallocation */ 1890 PetscCall(PetscArrayzero(d_nnz,na)); 1891 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1892 /* compute local off-diagonal contributions */ 1893 PetscCall(PetscArrayzero(g_nnz,nb)); 1894 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1895 /* map those to global */ 1896 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1897 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1898 PetscCall(PetscSFSetFromOptions(sf)); 1899 PetscCall(PetscArrayzero(o_nnz,na)); 1900 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1901 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1902 PetscCall(PetscSFDestroy(&sf)); 1903 1904 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1905 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1906 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1907 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1908 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1909 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1910 } else { 1911 B = *matout; 1912 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1913 } 1914 1915 b = (Mat_MPIAIJ*)B->data; 1916 A_diag = a->A; 1917 B_diag = &b->A; 1918 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1919 A_diag_ncol = A_diag->cmap->N; 1920 B_diag_ilen = sub_B_diag->ilen; 1921 B_diag_i = sub_B_diag->i; 1922 1923 /* Set ilen for diagonal of B */ 1924 for (i=0; i<A_diag_ncol; i++) { 1925 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1926 } 1927 1928 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1929 very quickly (=without using MatSetValues), because all writes are local. */ 1930 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1931 1932 /* copy over the B part */ 1933 PetscCall(PetscMalloc1(bi[mb],&cols)); 1934 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1935 pbv = bv; 1936 row = A->rmap->rstart; 1937 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1938 cols_tmp = cols; 1939 for (i=0; i<mb; i++) { 1940 ncol = bi[i+1]-bi[i]; 1941 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1942 row++; 1943 pbv += ncol; cols_tmp += ncol; 1944 } 1945 PetscCall(PetscFree(cols)); 1946 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1947 1948 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1949 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1950 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1951 *matout = B; 1952 } else { 1953 PetscCall(MatHeaderMerge(A,&B)); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat a = aij->A,b = aij->B; 1962 PetscInt s1,s2,s3; 1963 1964 PetscFunctionBegin; 1965 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1966 if (rr) { 1967 PetscCall(VecGetLocalSize(rr,&s1)); 1968 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1969 /* Overlap communication with computation. */ 1970 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1971 } 1972 if (ll) { 1973 PetscCall(VecGetLocalSize(ll,&s1)); 1974 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1975 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1976 } 1977 /* scale the diagonal block */ 1978 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1979 1980 if (rr) { 1981 /* Do a scatter end and then right scale the off-diagonal block */ 1982 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1983 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1989 { 1990 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1991 1992 PetscFunctionBegin; 1993 PetscCall(MatSetUnfactored(a->A)); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1998 { 1999 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2000 Mat a,b,c,d; 2001 PetscBool flg; 2002 2003 PetscFunctionBegin; 2004 a = matA->A; b = matA->B; 2005 c = matB->A; d = matB->B; 2006 2007 PetscCall(MatEqual(a,c,&flg)); 2008 if (flg) { 2009 PetscCall(MatEqual(b,d,&flg)); 2010 } 2011 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2016 { 2017 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2018 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2019 2020 PetscFunctionBegin; 2021 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2022 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2023 /* because of the column compression in the off-processor part of the matrix a->B, 2024 the number of columns in a->B and b->B may be different, hence we cannot call 2025 the MatCopy() directly on the two parts. If need be, we can provide a more 2026 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2027 then copying the submatrices */ 2028 PetscCall(MatCopy_Basic(A,B,str)); 2029 } else { 2030 PetscCall(MatCopy(a->A,b->A,str)); 2031 PetscCall(MatCopy(a->B,b->B,str)); 2032 } 2033 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2038 { 2039 PetscFunctionBegin; 2040 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 /* 2045 Computes the number of nonzeros per row needed for preallocation when X and Y 2046 have different nonzero structure. 2047 */ 2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2049 { 2050 PetscInt i,j,k,nzx,nzy; 2051 2052 PetscFunctionBegin; 2053 /* Set the number of nonzeros in the new matrix */ 2054 for (i=0; i<m; i++) { 2055 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2056 nzx = xi[i+1] - xi[i]; 2057 nzy = yi[i+1] - yi[i]; 2058 nnz[i] = 0; 2059 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2060 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2061 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2062 nnz[i]++; 2063 } 2064 for (; k<nzy; k++) nnz[i]++; 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2071 { 2072 PetscInt m = Y->rmap->N; 2073 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2074 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2075 2076 PetscFunctionBegin; 2077 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2082 { 2083 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 if (str == SAME_NONZERO_PATTERN) { 2087 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2088 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2089 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2090 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2091 } else { 2092 Mat B; 2093 PetscInt *nnz_d,*nnz_o; 2094 2095 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2096 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2097 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2098 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2099 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2100 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2101 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2102 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2103 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2104 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2105 PetscCall(MatHeaderMerge(Y,&B)); 2106 PetscCall(PetscFree(nnz_d)); 2107 PetscCall(PetscFree(nnz_o)); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2113 2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2115 { 2116 PetscFunctionBegin; 2117 if (PetscDefined(USE_COMPLEX)) { 2118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2119 2120 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2121 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 2130 PetscFunctionBegin; 2131 PetscCall(MatRealPart(a->A)); 2132 PetscCall(MatRealPart(a->B)); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2137 { 2138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2139 2140 PetscFunctionBegin; 2141 PetscCall(MatImaginaryPart(a->A)); 2142 PetscCall(MatImaginaryPart(a->B)); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2147 { 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 PetscInt i,*idxb = NULL,m = A->rmap->n; 2150 PetscScalar *va,*vv; 2151 Vec vB,vA; 2152 const PetscScalar *vb; 2153 2154 PetscFunctionBegin; 2155 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2156 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2157 2158 PetscCall(VecGetArrayWrite(vA,&va)); 2159 if (idx) { 2160 for (i=0; i<m; i++) { 2161 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2162 } 2163 } 2164 2165 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2166 PetscCall(PetscMalloc1(m,&idxb)); 2167 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2168 2169 PetscCall(VecGetArrayWrite(v,&vv)); 2170 PetscCall(VecGetArrayRead(vB,&vb)); 2171 for (i=0; i<m; i++) { 2172 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2173 vv[i] = vb[i]; 2174 if (idx) idx[i] = a->garray[idxb[i]]; 2175 } else { 2176 vv[i] = va[i]; 2177 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2178 idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2182 PetscCall(VecRestoreArrayWrite(vA,&va)); 2183 PetscCall(VecRestoreArrayRead(vB,&vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2191 { 2192 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2193 PetscInt m = A->rmap->n,n = A->cmap->n; 2194 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2195 PetscInt *cmap = mat->garray; 2196 PetscInt *diagIdx, *offdiagIdx; 2197 Vec diagV, offdiagV; 2198 PetscScalar *a, *diagA, *offdiagA; 2199 const PetscScalar *ba,*bav; 2200 PetscInt r,j,col,ncols,*bi,*bj; 2201 Mat B = mat->B; 2202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2203 2204 PetscFunctionBegin; 2205 /* When a process holds entire A and other processes have no entry */ 2206 if (A->cmap->N == n) { 2207 PetscCall(VecGetArrayWrite(v,&diagA)); 2208 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2209 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2210 PetscCall(VecDestroy(&diagV)); 2211 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2212 PetscFunctionReturn(0); 2213 } else if (n == 0) { 2214 if (m) { 2215 PetscCall(VecGetArrayWrite(v,&a)); 2216 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2217 PetscCall(VecRestoreArrayWrite(v,&a)); 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2223 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2224 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2225 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2226 2227 /* Get offdiagIdx[] for implicit 0.0 */ 2228 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2229 ba = bav; 2230 bi = b->i; 2231 bj = b->j; 2232 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2233 for (r = 0; r < m; r++) { 2234 ncols = bi[r+1] - bi[r]; 2235 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2236 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2237 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2238 offdiagA[r] = 0.0; 2239 2240 /* Find first hole in the cmap */ 2241 for (j=0; j<ncols; j++) { 2242 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2243 if (col > j && j < cstart) { 2244 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2245 break; 2246 } else if (col > j + n && j >= cstart) { 2247 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2248 break; 2249 } 2250 } 2251 if (j == ncols && ncols < A->cmap->N - n) { 2252 /* a hole is outside compressed Bcols */ 2253 if (ncols == 0) { 2254 if (cstart) { 2255 offdiagIdx[r] = 0; 2256 } else offdiagIdx[r] = cend; 2257 } else { /* ncols > 0 */ 2258 offdiagIdx[r] = cmap[ncols-1] + 1; 2259 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2260 } 2261 } 2262 } 2263 2264 for (j=0; j<ncols; j++) { 2265 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2266 ba++; bj++; 2267 } 2268 } 2269 2270 PetscCall(VecGetArrayWrite(v, &a)); 2271 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2272 for (r = 0; r < m; ++r) { 2273 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2274 a[r] = diagA[r]; 2275 if (idx) idx[r] = cstart + diagIdx[r]; 2276 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2277 a[r] = diagA[r]; 2278 if (idx) { 2279 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2280 idx[r] = cstart + diagIdx[r]; 2281 } else idx[r] = offdiagIdx[r]; 2282 } 2283 } else { 2284 a[r] = offdiagA[r]; 2285 if (idx) idx[r] = offdiagIdx[r]; 2286 } 2287 } 2288 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2289 PetscCall(VecRestoreArrayWrite(v, &a)); 2290 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2291 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2292 PetscCall(VecDestroy(&diagV)); 2293 PetscCall(VecDestroy(&offdiagV)); 2294 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2301 PetscInt m = A->rmap->n,n = A->cmap->n; 2302 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2303 PetscInt *cmap = mat->garray; 2304 PetscInt *diagIdx, *offdiagIdx; 2305 Vec diagV, offdiagV; 2306 PetscScalar *a, *diagA, *offdiagA; 2307 const PetscScalar *ba,*bav; 2308 PetscInt r,j,col,ncols,*bi,*bj; 2309 Mat B = mat->B; 2310 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2311 2312 PetscFunctionBegin; 2313 /* When a process holds entire A and other processes have no entry */ 2314 if (A->cmap->N == n) { 2315 PetscCall(VecGetArrayWrite(v,&diagA)); 2316 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2317 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2318 PetscCall(VecDestroy(&diagV)); 2319 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2320 PetscFunctionReturn(0); 2321 } else if (n == 0) { 2322 if (m) { 2323 PetscCall(VecGetArrayWrite(v,&a)); 2324 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2325 PetscCall(VecRestoreArrayWrite(v,&a)); 2326 } 2327 PetscFunctionReturn(0); 2328 } 2329 2330 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2331 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2332 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2333 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2334 2335 /* Get offdiagIdx[] for implicit 0.0 */ 2336 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2337 ba = bav; 2338 bi = b->i; 2339 bj = b->j; 2340 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2341 for (r = 0; r < m; r++) { 2342 ncols = bi[r+1] - bi[r]; 2343 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2344 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2345 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2346 offdiagA[r] = 0.0; 2347 2348 /* Find first hole in the cmap */ 2349 for (j=0; j<ncols; j++) { 2350 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2351 if (col > j && j < cstart) { 2352 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2353 break; 2354 } else if (col > j + n && j >= cstart) { 2355 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2356 break; 2357 } 2358 } 2359 if (j == ncols && ncols < A->cmap->N - n) { 2360 /* a hole is outside compressed Bcols */ 2361 if (ncols == 0) { 2362 if (cstart) { 2363 offdiagIdx[r] = 0; 2364 } else offdiagIdx[r] = cend; 2365 } else { /* ncols > 0 */ 2366 offdiagIdx[r] = cmap[ncols-1] + 1; 2367 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2368 } 2369 } 2370 } 2371 2372 for (j=0; j<ncols; j++) { 2373 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2374 ba++; bj++; 2375 } 2376 } 2377 2378 PetscCall(VecGetArrayWrite(v, &a)); 2379 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2380 for (r = 0; r < m; ++r) { 2381 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2382 a[r] = diagA[r]; 2383 if (idx) idx[r] = cstart + diagIdx[r]; 2384 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2385 a[r] = diagA[r]; 2386 if (idx) { 2387 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2388 idx[r] = cstart + diagIdx[r]; 2389 } else idx[r] = offdiagIdx[r]; 2390 } 2391 } else { 2392 a[r] = offdiagA[r]; 2393 if (idx) idx[r] = offdiagIdx[r]; 2394 } 2395 } 2396 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2397 PetscCall(VecRestoreArrayWrite(v, &a)); 2398 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2399 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2400 PetscCall(VecDestroy(&diagV)); 2401 PetscCall(VecDestroy(&offdiagV)); 2402 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2407 { 2408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2409 PetscInt m = A->rmap->n,n = A->cmap->n; 2410 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2411 PetscInt *cmap = mat->garray; 2412 PetscInt *diagIdx, *offdiagIdx; 2413 Vec diagV, offdiagV; 2414 PetscScalar *a, *diagA, *offdiagA; 2415 const PetscScalar *ba,*bav; 2416 PetscInt r,j,col,ncols,*bi,*bj; 2417 Mat B = mat->B; 2418 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2419 2420 PetscFunctionBegin; 2421 /* When a process holds entire A and other processes have no entry */ 2422 if (A->cmap->N == n) { 2423 PetscCall(VecGetArrayWrite(v,&diagA)); 2424 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2425 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2426 PetscCall(VecDestroy(&diagV)); 2427 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2428 PetscFunctionReturn(0); 2429 } else if (n == 0) { 2430 if (m) { 2431 PetscCall(VecGetArrayWrite(v,&a)); 2432 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2433 PetscCall(VecRestoreArrayWrite(v,&a)); 2434 } 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2439 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2440 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2441 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2442 2443 /* Get offdiagIdx[] for implicit 0.0 */ 2444 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2445 ba = bav; 2446 bi = b->i; 2447 bj = b->j; 2448 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2449 for (r = 0; r < m; r++) { 2450 ncols = bi[r+1] - bi[r]; 2451 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2452 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2453 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2454 offdiagA[r] = 0.0; 2455 2456 /* Find first hole in the cmap */ 2457 for (j=0; j<ncols; j++) { 2458 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2459 if (col > j && j < cstart) { 2460 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2461 break; 2462 } else if (col > j + n && j >= cstart) { 2463 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2464 break; 2465 } 2466 } 2467 if (j == ncols && ncols < A->cmap->N - n) { 2468 /* a hole is outside compressed Bcols */ 2469 if (ncols == 0) { 2470 if (cstart) { 2471 offdiagIdx[r] = 0; 2472 } else offdiagIdx[r] = cend; 2473 } else { /* ncols > 0 */ 2474 offdiagIdx[r] = cmap[ncols-1] + 1; 2475 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2476 } 2477 } 2478 } 2479 2480 for (j=0; j<ncols; j++) { 2481 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2482 ba++; bj++; 2483 } 2484 } 2485 2486 PetscCall(VecGetArrayWrite(v, &a)); 2487 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2488 for (r = 0; r < m; ++r) { 2489 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2490 a[r] = diagA[r]; 2491 if (idx) idx[r] = cstart + diagIdx[r]; 2492 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2493 a[r] = diagA[r]; 2494 if (idx) { 2495 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2496 idx[r] = cstart + diagIdx[r]; 2497 } else idx[r] = offdiagIdx[r]; 2498 } 2499 } else { 2500 a[r] = offdiagA[r]; 2501 if (idx) idx[r] = offdiagIdx[r]; 2502 } 2503 } 2504 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2505 PetscCall(VecRestoreArrayWrite(v, &a)); 2506 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2507 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2508 PetscCall(VecDestroy(&diagV)); 2509 PetscCall(VecDestroy(&offdiagV)); 2510 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2515 { 2516 Mat *dummy; 2517 2518 PetscFunctionBegin; 2519 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2520 *newmat = *dummy; 2521 PetscCall(PetscFree(dummy)); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2526 { 2527 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2528 2529 PetscFunctionBegin; 2530 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2531 A->factorerrortype = a->A->factorerrortype; 2532 PetscFunctionReturn(0); 2533 } 2534 2535 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2536 { 2537 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2538 2539 PetscFunctionBegin; 2540 PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2541 PetscCall(MatSetRandom(aij->A,rctx)); 2542 if (x->assembled) { 2543 PetscCall(MatSetRandom(aij->B,rctx)); 2544 } else { 2545 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2546 } 2547 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2548 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2553 { 2554 PetscFunctionBegin; 2555 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2556 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /*@ 2561 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2562 2563 Collective on Mat 2564 2565 Input Parameters: 2566 + A - the matrix 2567 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2568 2569 Level: advanced 2570 2571 @*/ 2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2573 { 2574 PetscFunctionBegin; 2575 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2580 { 2581 PetscBool sc = PETSC_FALSE,flg; 2582 2583 PetscFunctionBegin; 2584 PetscCall(PetscOptionsHead(PetscOptionsObject,"MPIAIJ options")); 2585 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2586 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2587 if (flg) { 2588 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2589 } 2590 PetscCall(PetscOptionsTail()); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2595 { 2596 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2597 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2598 2599 PetscFunctionBegin; 2600 if (!Y->preallocated) { 2601 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2602 } else if (!aij->nz) { 2603 PetscInt nonew = aij->nonew; 2604 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2605 aij->nonew = nonew; 2606 } 2607 PetscCall(MatShift_Basic(Y,a)); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2612 { 2613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2614 2615 PetscFunctionBegin; 2616 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2617 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2618 if (d) { 2619 PetscInt rstart; 2620 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2621 *d += rstart; 2622 2623 } 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2628 { 2629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2630 2631 PetscFunctionBegin; 2632 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 /* -------------------------------------------------------------------*/ 2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2638 MatGetRow_MPIAIJ, 2639 MatRestoreRow_MPIAIJ, 2640 MatMult_MPIAIJ, 2641 /* 4*/ MatMultAdd_MPIAIJ, 2642 MatMultTranspose_MPIAIJ, 2643 MatMultTransposeAdd_MPIAIJ, 2644 NULL, 2645 NULL, 2646 NULL, 2647 /*10*/ NULL, 2648 NULL, 2649 NULL, 2650 MatSOR_MPIAIJ, 2651 MatTranspose_MPIAIJ, 2652 /*15*/ MatGetInfo_MPIAIJ, 2653 MatEqual_MPIAIJ, 2654 MatGetDiagonal_MPIAIJ, 2655 MatDiagonalScale_MPIAIJ, 2656 MatNorm_MPIAIJ, 2657 /*20*/ MatAssemblyBegin_MPIAIJ, 2658 MatAssemblyEnd_MPIAIJ, 2659 MatSetOption_MPIAIJ, 2660 MatZeroEntries_MPIAIJ, 2661 /*24*/ MatZeroRows_MPIAIJ, 2662 NULL, 2663 NULL, 2664 NULL, 2665 NULL, 2666 /*29*/ MatSetUp_MPIAIJ, 2667 NULL, 2668 NULL, 2669 MatGetDiagonalBlock_MPIAIJ, 2670 NULL, 2671 /*34*/ MatDuplicate_MPIAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*39*/ MatAXPY_MPIAIJ, 2677 MatCreateSubMatrices_MPIAIJ, 2678 MatIncreaseOverlap_MPIAIJ, 2679 MatGetValues_MPIAIJ, 2680 MatCopy_MPIAIJ, 2681 /*44*/ MatGetRowMax_MPIAIJ, 2682 MatScale_MPIAIJ, 2683 MatShift_MPIAIJ, 2684 MatDiagonalSet_MPIAIJ, 2685 MatZeroRowsColumns_MPIAIJ, 2686 /*49*/ MatSetRandom_MPIAIJ, 2687 NULL, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*54*/ MatFDColoringCreate_MPIXAIJ, 2692 NULL, 2693 MatSetUnfactored_MPIAIJ, 2694 MatPermute_MPIAIJ, 2695 NULL, 2696 /*59*/ MatCreateSubMatrix_MPIAIJ, 2697 MatDestroy_MPIAIJ, 2698 MatView_MPIAIJ, 2699 NULL, 2700 NULL, 2701 /*64*/ NULL, 2702 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2703 NULL, 2704 NULL, 2705 NULL, 2706 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2707 MatGetRowMinAbs_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*75*/ MatFDColoringApply_AIJ, 2713 MatSetFromOptions_MPIAIJ, 2714 NULL, 2715 NULL, 2716 MatFindZeroDiagonals_MPIAIJ, 2717 /*80*/ NULL, 2718 NULL, 2719 NULL, 2720 /*83*/ MatLoad_MPIAIJ, 2721 MatIsSymmetric_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*89*/ NULL, 2727 NULL, 2728 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2729 NULL, 2730 NULL, 2731 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 MatBindToCPU_MPIAIJ, 2736 /*99*/ MatProductSetFromOptions_MPIAIJ, 2737 NULL, 2738 NULL, 2739 MatConjugate_MPIAIJ, 2740 NULL, 2741 /*104*/MatSetValuesRow_MPIAIJ, 2742 MatRealPart_MPIAIJ, 2743 MatImaginaryPart_MPIAIJ, 2744 NULL, 2745 NULL, 2746 /*109*/NULL, 2747 NULL, 2748 MatGetRowMin_MPIAIJ, 2749 NULL, 2750 MatMissingDiagonal_MPIAIJ, 2751 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2752 NULL, 2753 MatGetGhosts_MPIAIJ, 2754 NULL, 2755 NULL, 2756 /*119*/MatMultDiagonalBlock_MPIAIJ, 2757 NULL, 2758 NULL, 2759 NULL, 2760 MatGetMultiProcBlock_MPIAIJ, 2761 /*124*/MatFindNonzeroRows_MPIAIJ, 2762 MatGetColumnReductions_MPIAIJ, 2763 MatInvertBlockDiagonal_MPIAIJ, 2764 MatInvertVariableBlockDiagonal_MPIAIJ, 2765 MatCreateSubMatricesMPI_MPIAIJ, 2766 /*129*/NULL, 2767 NULL, 2768 NULL, 2769 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2770 NULL, 2771 /*134*/NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 NULL, 2776 /*139*/MatSetBlockSizes_MPIAIJ, 2777 NULL, 2778 NULL, 2779 MatFDColoringSetUp_MPIXAIJ, 2780 MatFindOffBlockDiagonalEntries_MPIAIJ, 2781 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2782 /*145*/NULL, 2783 NULL, 2784 NULL 2785 }; 2786 2787 /* ----------------------------------------------------------------------------------------*/ 2788 2789 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2790 { 2791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2792 2793 PetscFunctionBegin; 2794 PetscCall(MatStoreValues(aij->A)); 2795 PetscCall(MatStoreValues(aij->B)); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2800 { 2801 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2802 2803 PetscFunctionBegin; 2804 PetscCall(MatRetrieveValues(aij->A)); 2805 PetscCall(MatRetrieveValues(aij->B)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2810 { 2811 Mat_MPIAIJ *b; 2812 PetscMPIInt size; 2813 2814 PetscFunctionBegin; 2815 PetscCall(PetscLayoutSetUp(B->rmap)); 2816 PetscCall(PetscLayoutSetUp(B->cmap)); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 PetscCall(PetscTableDestroy(&b->colmap)); 2821 #else 2822 PetscCall(PetscFree(b->colmap)); 2823 #endif 2824 PetscCall(PetscFree(b->garray)); 2825 PetscCall(VecDestroy(&b->lvec)); 2826 PetscCall(VecScatterDestroy(&b->Mvctx)); 2827 2828 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2829 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2830 PetscCall(MatDestroy(&b->B)); 2831 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2832 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2833 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2834 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2835 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2836 2837 if (!B->preallocated) { 2838 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2839 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2840 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2841 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2842 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2843 } 2844 2845 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2846 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2847 B->preallocated = PETSC_TRUE; 2848 B->was_assembled = PETSC_FALSE; 2849 B->assembled = PETSC_FALSE; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2854 { 2855 Mat_MPIAIJ *b; 2856 2857 PetscFunctionBegin; 2858 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2859 PetscCall(PetscLayoutSetUp(B->rmap)); 2860 PetscCall(PetscLayoutSetUp(B->cmap)); 2861 b = (Mat_MPIAIJ*)B->data; 2862 2863 #if defined(PETSC_USE_CTABLE) 2864 PetscCall(PetscTableDestroy(&b->colmap)); 2865 #else 2866 PetscCall(PetscFree(b->colmap)); 2867 #endif 2868 PetscCall(PetscFree(b->garray)); 2869 PetscCall(VecDestroy(&b->lvec)); 2870 PetscCall(VecScatterDestroy(&b->Mvctx)); 2871 2872 PetscCall(MatResetPreallocation(b->A)); 2873 PetscCall(MatResetPreallocation(b->B)); 2874 B->preallocated = PETSC_TRUE; 2875 B->was_assembled = PETSC_FALSE; 2876 B->assembled = PETSC_FALSE; 2877 PetscFunctionReturn(0); 2878 } 2879 2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2881 { 2882 Mat mat; 2883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2884 2885 PetscFunctionBegin; 2886 *newmat = NULL; 2887 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2888 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2889 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2890 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2891 a = (Mat_MPIAIJ*)mat->data; 2892 2893 mat->factortype = matin->factortype; 2894 mat->assembled = matin->assembled; 2895 mat->insertmode = NOT_SET_VALUES; 2896 mat->preallocated = matin->preallocated; 2897 2898 a->size = oldmat->size; 2899 a->rank = oldmat->rank; 2900 a->donotstash = oldmat->donotstash; 2901 a->roworiented = oldmat->roworiented; 2902 a->rowindices = NULL; 2903 a->rowvalues = NULL; 2904 a->getrowactive = PETSC_FALSE; 2905 2906 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2907 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2908 2909 if (oldmat->colmap) { 2910 #if defined(PETSC_USE_CTABLE) 2911 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2912 #else 2913 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2914 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2915 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2916 #endif 2917 } else a->colmap = NULL; 2918 if (oldmat->garray) { 2919 PetscInt len; 2920 len = oldmat->B->cmap->n; 2921 PetscCall(PetscMalloc1(len+1,&a->garray)); 2922 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2923 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2924 } else a->garray = NULL; 2925 2926 /* It may happen MatDuplicate is called with a non-assembled matrix 2927 In fact, MatDuplicate only requires the matrix to be preallocated 2928 This may happen inside a DMCreateMatrix_Shell */ 2929 if (oldmat->lvec) { 2930 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2931 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2932 } 2933 if (oldmat->Mvctx) { 2934 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2935 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2936 } 2937 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2938 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2939 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2941 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2942 *newmat = mat; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2947 { 2948 PetscBool isbinary, ishdf5; 2949 2950 PetscFunctionBegin; 2951 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2952 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2953 /* force binary viewer to load .info file if it has not yet done so */ 2954 PetscCall(PetscViewerSetUp(viewer)); 2955 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2956 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2957 if (isbinary) { 2958 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2959 } else if (ishdf5) { 2960 #if defined(PETSC_HAVE_HDF5) 2961 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2962 #else 2963 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2964 #endif 2965 } else { 2966 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2967 } 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2972 { 2973 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2974 PetscInt *rowidxs,*colidxs; 2975 PetscScalar *matvals; 2976 2977 PetscFunctionBegin; 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 2980 /* read in matrix header */ 2981 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 2982 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2983 M = header[1]; N = header[2]; nz = header[3]; 2984 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 2985 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 2986 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2987 2988 /* set block sizes from the viewer's .info file */ 2989 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 2990 /* set global sizes if not set already */ 2991 if (mat->rmap->N < 0) mat->rmap->N = M; 2992 if (mat->cmap->N < 0) mat->cmap->N = N; 2993 PetscCall(PetscLayoutSetUp(mat->rmap)); 2994 PetscCall(PetscLayoutSetUp(mat->cmap)); 2995 2996 /* check if the matrix sizes are correct */ 2997 PetscCall(MatGetSize(mat,&rows,&cols)); 2998 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 2999 3000 /* read in row lengths and build row indices */ 3001 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3002 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3003 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3004 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3005 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3006 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3007 /* read in column indices and matrix values */ 3008 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3009 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3010 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3011 /* store matrix indices and values */ 3012 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3013 PetscCall(PetscFree(rowidxs)); 3014 PetscCall(PetscFree2(colidxs,matvals)); 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* Not scalable because of ISAllGather() unless getting all columns. */ 3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3020 { 3021 IS iscol_local; 3022 PetscBool isstride; 3023 PetscMPIInt lisstride=0,gisstride; 3024 3025 PetscFunctionBegin; 3026 /* check if we are grabbing all columns*/ 3027 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3028 3029 if (isstride) { 3030 PetscInt start,len,mstart,mlen; 3031 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3032 PetscCall(ISGetLocalSize(iscol,&len)); 3033 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3034 if (mstart == start && mlen-mstart == len) lisstride = 1; 3035 } 3036 3037 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3038 if (gisstride) { 3039 PetscInt N; 3040 PetscCall(MatGetSize(mat,NULL,&N)); 3041 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3042 PetscCall(ISSetIdentity(iscol_local)); 3043 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3044 } else { 3045 PetscInt cbs; 3046 PetscCall(ISGetBlockSize(iscol,&cbs)); 3047 PetscCall(ISAllGather(iscol,&iscol_local)); 3048 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3049 } 3050 3051 *isseq = iscol_local; 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* 3056 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3057 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3058 3059 Input Parameters: 3060 mat - matrix 3061 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3062 i.e., mat->rstart <= isrow[i] < mat->rend 3063 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3064 i.e., mat->cstart <= iscol[i] < mat->cend 3065 Output Parameter: 3066 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3067 iscol_o - sequential column index set for retrieving mat->B 3068 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3069 */ 3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3071 { 3072 Vec x,cmap; 3073 const PetscInt *is_idx; 3074 PetscScalar *xarray,*cmaparray; 3075 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3076 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3077 Mat B=a->B; 3078 Vec lvec=a->lvec,lcmap; 3079 PetscInt i,cstart,cend,Bn=B->cmap->N; 3080 MPI_Comm comm; 3081 VecScatter Mvctx=a->Mvctx; 3082 3083 PetscFunctionBegin; 3084 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3085 PetscCall(ISGetLocalSize(iscol,&ncols)); 3086 3087 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3088 PetscCall(MatCreateVecs(mat,&x,NULL)); 3089 PetscCall(VecSet(x,-1.0)); 3090 PetscCall(VecDuplicate(x,&cmap)); 3091 PetscCall(VecSet(cmap,-1.0)); 3092 3093 /* Get start indices */ 3094 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3095 isstart -= ncols; 3096 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3097 3098 PetscCall(ISGetIndices(iscol,&is_idx)); 3099 PetscCall(VecGetArray(x,&xarray)); 3100 PetscCall(VecGetArray(cmap,&cmaparray)); 3101 PetscCall(PetscMalloc1(ncols,&idx)); 3102 for (i=0; i<ncols; i++) { 3103 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3104 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3105 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3106 } 3107 PetscCall(VecRestoreArray(x,&xarray)); 3108 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3109 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3110 3111 /* Get iscol_d */ 3112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3113 PetscCall(ISGetBlockSize(iscol,&i)); 3114 PetscCall(ISSetBlockSize(*iscol_d,i)); 3115 3116 /* Get isrow_d */ 3117 PetscCall(ISGetLocalSize(isrow,&m)); 3118 rstart = mat->rmap->rstart; 3119 PetscCall(PetscMalloc1(m,&idx)); 3120 PetscCall(ISGetIndices(isrow,&is_idx)); 3121 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3122 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3123 3124 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3125 PetscCall(ISGetBlockSize(isrow,&i)); 3126 PetscCall(ISSetBlockSize(*isrow_d,i)); 3127 3128 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3129 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3130 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3131 3132 PetscCall(VecDuplicate(lvec,&lcmap)); 3133 3134 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3135 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3136 3137 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3138 /* off-process column indices */ 3139 count = 0; 3140 PetscCall(PetscMalloc1(Bn,&idx)); 3141 PetscCall(PetscMalloc1(Bn,&cmap1)); 3142 3143 PetscCall(VecGetArray(lvec,&xarray)); 3144 PetscCall(VecGetArray(lcmap,&cmaparray)); 3145 for (i=0; i<Bn; i++) { 3146 if (PetscRealPart(xarray[i]) > -1.0) { 3147 idx[count] = i; /* local column index in off-diagonal part B */ 3148 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3149 count++; 3150 } 3151 } 3152 PetscCall(VecRestoreArray(lvec,&xarray)); 3153 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3154 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3156 /* cannot ensure iscol_o has same blocksize as iscol! */ 3157 3158 PetscCall(PetscFree(idx)); 3159 *garray = cmap1; 3160 3161 PetscCall(VecDestroy(&x)); 3162 PetscCall(VecDestroy(&cmap)); 3163 PetscCall(VecDestroy(&lcmap)); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3169 { 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3171 Mat M = NULL; 3172 MPI_Comm comm; 3173 IS iscol_d,isrow_d,iscol_o; 3174 Mat Asub = NULL,Bsub = NULL; 3175 PetscInt n; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3179 3180 if (call == MAT_REUSE_MATRIX) { 3181 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3182 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3183 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3184 3185 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3186 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3187 3188 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3189 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3190 3191 /* Update diagonal and off-diagonal portions of submat */ 3192 asub = (Mat_MPIAIJ*)(*submat)->data; 3193 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3194 PetscCall(ISGetLocalSize(iscol_o,&n)); 3195 if (n) { 3196 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3197 } 3198 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3199 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3200 3201 } else { /* call == MAT_INITIAL_MATRIX) */ 3202 const PetscInt *garray; 3203 PetscInt BsubN; 3204 3205 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3206 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3207 3208 /* Create local submatrices Asub and Bsub */ 3209 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3210 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3211 3212 /* Create submatrix M */ 3213 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3214 3215 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3216 asub = (Mat_MPIAIJ*)M->data; 3217 3218 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3219 n = asub->B->cmap->N; 3220 if (BsubN > n) { 3221 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3222 const PetscInt *idx; 3223 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3224 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3225 3226 PetscCall(PetscMalloc1(n,&idx_new)); 3227 j = 0; 3228 PetscCall(ISGetIndices(iscol_o,&idx)); 3229 for (i=0; i<n; i++) { 3230 if (j >= BsubN) break; 3231 while (subgarray[i] > garray[j]) j++; 3232 3233 if (subgarray[i] == garray[j]) { 3234 idx_new[i] = idx[j++]; 3235 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3236 } 3237 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3238 3239 PetscCall(ISDestroy(&iscol_o)); 3240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3241 3242 } else if (BsubN < n) { 3243 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3244 } 3245 3246 PetscCall(PetscFree(garray)); 3247 *submat = M; 3248 3249 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3250 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3251 PetscCall(ISDestroy(&isrow_d)); 3252 3253 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3254 PetscCall(ISDestroy(&iscol_d)); 3255 3256 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3257 PetscCall(ISDestroy(&iscol_o)); 3258 } 3259 PetscFunctionReturn(0); 3260 } 3261 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3263 { 3264 IS iscol_local=NULL,isrow_d; 3265 PetscInt csize; 3266 PetscInt n,i,j,start,end; 3267 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3268 MPI_Comm comm; 3269 3270 PetscFunctionBegin; 3271 /* If isrow has same processor distribution as mat, 3272 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3273 if (call == MAT_REUSE_MATRIX) { 3274 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3275 if (isrow_d) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3278 } else { 3279 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3280 if (iscol_local) { 3281 sameRowDist = PETSC_TRUE; 3282 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3283 } 3284 } 3285 } else { 3286 /* Check if isrow has same processor distribution as mat */ 3287 sameDist[0] = PETSC_FALSE; 3288 PetscCall(ISGetLocalSize(isrow,&n)); 3289 if (!n) { 3290 sameDist[0] = PETSC_TRUE; 3291 } else { 3292 PetscCall(ISGetMinMax(isrow,&i,&j)); 3293 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3294 if (i >= start && j < end) { 3295 sameDist[0] = PETSC_TRUE; 3296 } 3297 } 3298 3299 /* Check if iscol has same processor distribution as mat */ 3300 sameDist[1] = PETSC_FALSE; 3301 PetscCall(ISGetLocalSize(iscol,&n)); 3302 if (!n) { 3303 sameDist[1] = PETSC_TRUE; 3304 } else { 3305 PetscCall(ISGetMinMax(iscol,&i,&j)); 3306 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3307 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3308 } 3309 3310 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3311 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3312 sameRowDist = tsameDist[0]; 3313 } 3314 3315 if (sameRowDist) { 3316 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3317 /* isrow and iscol have same processor distribution as mat */ 3318 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3319 PetscFunctionReturn(0); 3320 } else { /* sameRowDist */ 3321 /* isrow has same processor distribution as mat */ 3322 if (call == MAT_INITIAL_MATRIX) { 3323 PetscBool sorted; 3324 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3325 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3326 PetscCall(ISGetSize(iscol,&i)); 3327 PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3328 3329 PetscCall(ISSorted(iscol_local,&sorted)); 3330 if (sorted) { 3331 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3332 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3333 PetscFunctionReturn(0); 3334 } 3335 } else { /* call == MAT_REUSE_MATRIX */ 3336 IS iscol_sub; 3337 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3338 if (iscol_sub) { 3339 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3340 PetscFunctionReturn(0); 3341 } 3342 } 3343 } 3344 } 3345 3346 /* General case: iscol -> iscol_local which has global size of iscol */ 3347 if (call == MAT_REUSE_MATRIX) { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3349 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3350 } else { 3351 if (!iscol_local) { 3352 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3353 } 3354 } 3355 3356 PetscCall(ISGetLocalSize(iscol,&csize)); 3357 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3358 3359 if (call == MAT_INITIAL_MATRIX) { 3360 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3361 PetscCall(ISDestroy(&iscol_local)); 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 /*@C 3367 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3368 and "off-diagonal" part of the matrix in CSR format. 3369 3370 Collective 3371 3372 Input Parameters: 3373 + comm - MPI communicator 3374 . A - "diagonal" portion of matrix 3375 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3376 - garray - global index of B columns 3377 3378 Output Parameter: 3379 . mat - the matrix, with input A as its local diagonal matrix 3380 Level: advanced 3381 3382 Notes: 3383 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3384 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3385 3386 .seealso: MatCreateMPIAIJWithSplitArrays() 3387 @*/ 3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3389 { 3390 Mat_MPIAIJ *maij; 3391 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3392 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3393 const PetscScalar *oa; 3394 Mat Bnew; 3395 PetscInt m,n,N; 3396 MatType mpi_mat_type; 3397 3398 PetscFunctionBegin; 3399 PetscCall(MatCreate(comm,mat)); 3400 PetscCall(MatGetSize(A,&m,&n)); 3401 PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3402 PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3403 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3404 /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3405 3406 /* Get global columns of mat */ 3407 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3408 3409 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3410 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3411 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3412 PetscCall(MatSetType(*mat,mpi_mat_type)); 3413 3414 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3415 maij = (Mat_MPIAIJ*)(*mat)->data; 3416 3417 (*mat)->preallocated = PETSC_TRUE; 3418 3419 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3420 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3421 3422 /* Set A as diagonal portion of *mat */ 3423 maij->A = A; 3424 3425 nz = oi[m]; 3426 for (i=0; i<nz; i++) { 3427 col = oj[i]; 3428 oj[i] = garray[col]; 3429 } 3430 3431 /* Set Bnew as off-diagonal portion of *mat */ 3432 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3433 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3434 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3435 bnew = (Mat_SeqAIJ*)Bnew->data; 3436 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3437 maij->B = Bnew; 3438 3439 PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3440 3441 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3442 b->free_a = PETSC_FALSE; 3443 b->free_ij = PETSC_FALSE; 3444 PetscCall(MatDestroy(&B)); 3445 3446 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3447 bnew->free_a = PETSC_TRUE; 3448 bnew->free_ij = PETSC_TRUE; 3449 3450 /* condense columns of maij->B */ 3451 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3452 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3453 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3454 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3455 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3456 PetscFunctionReturn(0); 3457 } 3458 3459 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3460 3461 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3462 { 3463 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3464 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3465 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3466 Mat M,Msub,B=a->B; 3467 MatScalar *aa; 3468 Mat_SeqAIJ *aij; 3469 PetscInt *garray = a->garray,*colsub,Ncols; 3470 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3471 IS iscol_sub,iscmap; 3472 const PetscInt *is_idx,*cmap; 3473 PetscBool allcolumns=PETSC_FALSE; 3474 MPI_Comm comm; 3475 3476 PetscFunctionBegin; 3477 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3478 if (call == MAT_REUSE_MATRIX) { 3479 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3480 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3481 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3482 3483 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3484 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3485 3486 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3487 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3488 3489 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3490 3491 } else { /* call == MAT_INITIAL_MATRIX) */ 3492 PetscBool flg; 3493 3494 PetscCall(ISGetLocalSize(iscol,&n)); 3495 PetscCall(ISGetSize(iscol,&Ncols)); 3496 3497 /* (1) iscol -> nonscalable iscol_local */ 3498 /* Check for special case: each processor gets entire matrix columns */ 3499 PetscCall(ISIdentity(iscol_local,&flg)); 3500 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3501 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3502 if (allcolumns) { 3503 iscol_sub = iscol_local; 3504 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3505 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3506 3507 } else { 3508 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3509 PetscInt *idx,*cmap1,k; 3510 PetscCall(PetscMalloc1(Ncols,&idx)); 3511 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3512 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3513 count = 0; 3514 k = 0; 3515 for (i=0; i<Ncols; i++) { 3516 j = is_idx[i]; 3517 if (j >= cstart && j < cend) { 3518 /* diagonal part of mat */ 3519 idx[count] = j; 3520 cmap1[count++] = i; /* column index in submat */ 3521 } else if (Bn) { 3522 /* off-diagonal part of mat */ 3523 if (j == garray[k]) { 3524 idx[count] = j; 3525 cmap1[count++] = i; /* column index in submat */ 3526 } else if (j > garray[k]) { 3527 while (j > garray[k] && k < Bn-1) k++; 3528 if (j == garray[k]) { 3529 idx[count] = j; 3530 cmap1[count++] = i; /* column index in submat */ 3531 } 3532 } 3533 } 3534 } 3535 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3536 3537 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3538 PetscCall(ISGetBlockSize(iscol,&cbs)); 3539 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3540 3541 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3542 } 3543 3544 /* (3) Create sequential Msub */ 3545 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3546 } 3547 3548 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3549 aij = (Mat_SeqAIJ*)(Msub)->data; 3550 ii = aij->i; 3551 PetscCall(ISGetIndices(iscmap,&cmap)); 3552 3553 /* 3554 m - number of local rows 3555 Ncols - number of columns (same on all processors) 3556 rstart - first row in new global matrix generated 3557 */ 3558 PetscCall(MatGetSize(Msub,&m,NULL)); 3559 3560 if (call == MAT_INITIAL_MATRIX) { 3561 /* (4) Create parallel newmat */ 3562 PetscMPIInt rank,size; 3563 PetscInt csize; 3564 3565 PetscCallMPI(MPI_Comm_size(comm,&size)); 3566 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3567 3568 /* 3569 Determine the number of non-zeros in the diagonal and off-diagonal 3570 portions of the matrix in order to do correct preallocation 3571 */ 3572 3573 /* first get start and end of "diagonal" columns */ 3574 PetscCall(ISGetLocalSize(iscol,&csize)); 3575 if (csize == PETSC_DECIDE) { 3576 PetscCall(ISGetSize(isrow,&mglobal)); 3577 if (mglobal == Ncols) { /* square matrix */ 3578 nlocal = m; 3579 } else { 3580 nlocal = Ncols/size + ((Ncols % size) > rank); 3581 } 3582 } else { 3583 nlocal = csize; 3584 } 3585 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3586 rstart = rend - nlocal; 3587 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3588 3589 /* next, compute all the lengths */ 3590 jj = aij->j; 3591 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3592 olens = dlens + m; 3593 for (i=0; i<m; i++) { 3594 jend = ii[i+1] - ii[i]; 3595 olen = 0; 3596 dlen = 0; 3597 for (j=0; j<jend; j++) { 3598 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3599 else dlen++; 3600 jj++; 3601 } 3602 olens[i] = olen; 3603 dlens[i] = dlen; 3604 } 3605 3606 PetscCall(ISGetBlockSize(isrow,&bs)); 3607 PetscCall(ISGetBlockSize(iscol,&cbs)); 3608 3609 PetscCall(MatCreate(comm,&M)); 3610 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3611 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3612 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3613 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3614 PetscCall(PetscFree(dlens)); 3615 3616 } else { /* call == MAT_REUSE_MATRIX */ 3617 M = *newmat; 3618 PetscCall(MatGetLocalSize(M,&i,NULL)); 3619 PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3620 PetscCall(MatZeroEntries(M)); 3621 /* 3622 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3623 rather than the slower MatSetValues(). 3624 */ 3625 M->was_assembled = PETSC_TRUE; 3626 M->assembled = PETSC_FALSE; 3627 } 3628 3629 /* (5) Set values of Msub to *newmat */ 3630 PetscCall(PetscMalloc1(count,&colsub)); 3631 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3632 3633 jj = aij->j; 3634 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3635 for (i=0; i<m; i++) { 3636 row = rstart + i; 3637 nz = ii[i+1] - ii[i]; 3638 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3639 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3640 jj += nz; aa += nz; 3641 } 3642 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3643 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3644 3645 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3646 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3647 3648 PetscCall(PetscFree(colsub)); 3649 3650 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3651 if (call == MAT_INITIAL_MATRIX) { 3652 *newmat = M; 3653 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3654 PetscCall(MatDestroy(&Msub)); 3655 3656 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3657 PetscCall(ISDestroy(&iscol_sub)); 3658 3659 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3660 PetscCall(ISDestroy(&iscmap)); 3661 3662 if (iscol_local) { 3663 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3664 PetscCall(ISDestroy(&iscol_local)); 3665 } 3666 } 3667 PetscFunctionReturn(0); 3668 } 3669 3670 /* 3671 Not great since it makes two copies of the submatrix, first an SeqAIJ 3672 in local and then by concatenating the local matrices the end result. 3673 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3674 3675 Note: This requires a sequential iscol with all indices. 3676 */ 3677 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3678 { 3679 PetscMPIInt rank,size; 3680 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3681 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3682 Mat M,Mreuse; 3683 MatScalar *aa,*vwork; 3684 MPI_Comm comm; 3685 Mat_SeqAIJ *aij; 3686 PetscBool colflag,allcolumns=PETSC_FALSE; 3687 3688 PetscFunctionBegin; 3689 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3690 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3691 PetscCallMPI(MPI_Comm_size(comm,&size)); 3692 3693 /* Check for special case: each processor gets entire matrix columns */ 3694 PetscCall(ISIdentity(iscol,&colflag)); 3695 PetscCall(ISGetLocalSize(iscol,&n)); 3696 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3697 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3698 3699 if (call == MAT_REUSE_MATRIX) { 3700 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3701 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3702 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3703 } else { 3704 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3705 } 3706 3707 /* 3708 m - number of local rows 3709 n - number of columns (same on all processors) 3710 rstart - first row in new global matrix generated 3711 */ 3712 PetscCall(MatGetSize(Mreuse,&m,&n)); 3713 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3714 if (call == MAT_INITIAL_MATRIX) { 3715 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3716 ii = aij->i; 3717 jj = aij->j; 3718 3719 /* 3720 Determine the number of non-zeros in the diagonal and off-diagonal 3721 portions of the matrix in order to do correct preallocation 3722 */ 3723 3724 /* first get start and end of "diagonal" columns */ 3725 if (csize == PETSC_DECIDE) { 3726 PetscCall(ISGetSize(isrow,&mglobal)); 3727 if (mglobal == n) { /* square matrix */ 3728 nlocal = m; 3729 } else { 3730 nlocal = n/size + ((n % size) > rank); 3731 } 3732 } else { 3733 nlocal = csize; 3734 } 3735 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3736 rstart = rend - nlocal; 3737 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3738 3739 /* next, compute all the lengths */ 3740 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3741 olens = dlens + m; 3742 for (i=0; i<m; i++) { 3743 jend = ii[i+1] - ii[i]; 3744 olen = 0; 3745 dlen = 0; 3746 for (j=0; j<jend; j++) { 3747 if (*jj < rstart || *jj >= rend) olen++; 3748 else dlen++; 3749 jj++; 3750 } 3751 olens[i] = olen; 3752 dlens[i] = dlen; 3753 } 3754 PetscCall(MatCreate(comm,&M)); 3755 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3756 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3757 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3758 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3759 PetscCall(PetscFree(dlens)); 3760 } else { 3761 PetscInt ml,nl; 3762 3763 M = *newmat; 3764 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3765 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3766 PetscCall(MatZeroEntries(M)); 3767 /* 3768 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3769 rather than the slower MatSetValues(). 3770 */ 3771 M->was_assembled = PETSC_TRUE; 3772 M->assembled = PETSC_FALSE; 3773 } 3774 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3775 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3776 ii = aij->i; 3777 jj = aij->j; 3778 3779 /* trigger copy to CPU if needed */ 3780 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3781 for (i=0; i<m; i++) { 3782 row = rstart + i; 3783 nz = ii[i+1] - ii[i]; 3784 cwork = jj; jj += nz; 3785 vwork = aa; aa += nz; 3786 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3787 } 3788 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3789 3790 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3791 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3792 *newmat = M; 3793 3794 /* save submatrix used in processor for next request */ 3795 if (call == MAT_INITIAL_MATRIX) { 3796 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3797 PetscCall(MatDestroy(&Mreuse)); 3798 } 3799 PetscFunctionReturn(0); 3800 } 3801 3802 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3803 { 3804 PetscInt m,cstart, cend,j,nnz,i,d; 3805 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3806 const PetscInt *JJ; 3807 PetscBool nooffprocentries; 3808 3809 PetscFunctionBegin; 3810 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3811 3812 PetscCall(PetscLayoutSetUp(B->rmap)); 3813 PetscCall(PetscLayoutSetUp(B->cmap)); 3814 m = B->rmap->n; 3815 cstart = B->cmap->rstart; 3816 cend = B->cmap->rend; 3817 rstart = B->rmap->rstart; 3818 3819 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3820 3821 if (PetscDefined(USE_DEBUG)) { 3822 for (i=0; i<m; i++) { 3823 nnz = Ii[i+1]- Ii[i]; 3824 JJ = J + Ii[i]; 3825 PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3826 PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3827 PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3828 } 3829 } 3830 3831 for (i=0; i<m; i++) { 3832 nnz = Ii[i+1]- Ii[i]; 3833 JJ = J + Ii[i]; 3834 nnz_max = PetscMax(nnz_max,nnz); 3835 d = 0; 3836 for (j=0; j<nnz; j++) { 3837 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3838 } 3839 d_nnz[i] = d; 3840 o_nnz[i] = nnz - d; 3841 } 3842 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3843 PetscCall(PetscFree2(d_nnz,o_nnz)); 3844 3845 for (i=0; i<m; i++) { 3846 ii = i + rstart; 3847 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3848 } 3849 nooffprocentries = B->nooffprocentries; 3850 B->nooffprocentries = PETSC_TRUE; 3851 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3852 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3853 B->nooffprocentries = nooffprocentries; 3854 3855 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3856 PetscFunctionReturn(0); 3857 } 3858 3859 /*@ 3860 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3861 (the default parallel PETSc format). 3862 3863 Collective 3864 3865 Input Parameters: 3866 + B - the matrix 3867 . i - the indices into j for the start of each local row (starts with zero) 3868 . j - the column indices for each local row (starts with zero) 3869 - v - optional values in the matrix 3870 3871 Level: developer 3872 3873 Notes: 3874 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3875 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3876 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3877 3878 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3879 3880 The format which is used for the sparse matrix input, is equivalent to a 3881 row-major ordering.. i.e for the following matrix, the input data expected is 3882 as shown 3883 3884 $ 1 0 0 3885 $ 2 0 3 P0 3886 $ ------- 3887 $ 4 5 6 P1 3888 $ 3889 $ Process0 [P0]: rows_owned=[0,1] 3890 $ i = {0,1,3} [size = nrow+1 = 2+1] 3891 $ j = {0,0,2} [size = 3] 3892 $ v = {1,2,3} [size = 3] 3893 $ 3894 $ Process1 [P1]: rows_owned=[2] 3895 $ i = {0,3} [size = nrow+1 = 1+1] 3896 $ j = {0,1,2} [size = 3] 3897 $ v = {4,5,6} [size = 3] 3898 3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3900 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3901 @*/ 3902 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3903 { 3904 PetscFunctionBegin; 3905 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3906 PetscFunctionReturn(0); 3907 } 3908 3909 /*@C 3910 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3911 (the default parallel PETSc format). For good matrix assembly performance 3912 the user should preallocate the matrix storage by setting the parameters 3913 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3914 performance can be increased by more than a factor of 50. 3915 3916 Collective 3917 3918 Input Parameters: 3919 + B - the matrix 3920 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3921 (same value is used for all local rows) 3922 . d_nnz - array containing the number of nonzeros in the various rows of the 3923 DIAGONAL portion of the local submatrix (possibly different for each row) 3924 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3925 The size of this array is equal to the number of local rows, i.e 'm'. 3926 For matrices that will be factored, you must leave room for (and set) 3927 the diagonal entry even if it is zero. 3928 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3929 submatrix (same value is used for all local rows). 3930 - o_nnz - array containing the number of nonzeros in the various rows of the 3931 OFF-DIAGONAL portion of the local submatrix (possibly different for 3932 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3933 structure. The size of this array is equal to the number 3934 of local rows, i.e 'm'. 3935 3936 If the *_nnz parameter is given then the *_nz parameter is ignored 3937 3938 The AIJ format (also called the Yale sparse matrix format or 3939 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3940 storage. The stored row and column indices begin with zero. 3941 See Users-Manual: ch_mat for details. 3942 3943 The parallel matrix is partitioned such that the first m0 rows belong to 3944 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3945 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3946 3947 The DIAGONAL portion of the local submatrix of a processor can be defined 3948 as the submatrix which is obtained by extraction the part corresponding to 3949 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3950 first row that belongs to the processor, r2 is the last row belonging to 3951 the this processor, and c1-c2 is range of indices of the local part of a 3952 vector suitable for applying the matrix to. This is an mxn matrix. In the 3953 common case of a square matrix, the row and column ranges are the same and 3954 the DIAGONAL part is also square. The remaining portion of the local 3955 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3956 3957 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3958 3959 You can call MatGetInfo() to get information on how effective the preallocation was; 3960 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3961 You can also run with the option -info and look for messages with the string 3962 malloc in them to see if additional memory allocation was needed. 3963 3964 Example usage: 3965 3966 Consider the following 8x8 matrix with 34 non-zero values, that is 3967 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3968 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3969 as follows: 3970 3971 .vb 3972 1 2 0 | 0 3 0 | 0 4 3973 Proc0 0 5 6 | 7 0 0 | 8 0 3974 9 0 10 | 11 0 0 | 12 0 3975 ------------------------------------- 3976 13 0 14 | 15 16 17 | 0 0 3977 Proc1 0 18 0 | 19 20 21 | 0 0 3978 0 0 0 | 22 23 0 | 24 0 3979 ------------------------------------- 3980 Proc2 25 26 27 | 0 0 28 | 29 0 3981 30 0 0 | 31 32 33 | 0 34 3982 .ve 3983 3984 This can be represented as a collection of submatrices as: 3985 3986 .vb 3987 A B C 3988 D E F 3989 G H I 3990 .ve 3991 3992 Where the submatrices A,B,C are owned by proc0, D,E,F are 3993 owned by proc1, G,H,I are owned by proc2. 3994 3995 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3996 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3997 The 'M','N' parameters are 8,8, and have the same values on all procs. 3998 3999 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4000 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4001 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4002 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4003 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4004 matrix, ans [DF] as another SeqAIJ matrix. 4005 4006 When d_nz, o_nz parameters are specified, d_nz storage elements are 4007 allocated for every row of the local diagonal submatrix, and o_nz 4008 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4009 One way to choose d_nz and o_nz is to use the max nonzerors per local 4010 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4011 In this case, the values of d_nz,o_nz are: 4012 .vb 4013 proc0 : dnz = 2, o_nz = 2 4014 proc1 : dnz = 3, o_nz = 2 4015 proc2 : dnz = 1, o_nz = 4 4016 .ve 4017 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4018 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4019 for proc3. i.e we are using 12+15+10=37 storage locations to store 4020 34 values. 4021 4022 When d_nnz, o_nnz parameters are specified, the storage is specified 4023 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4024 In the above case the values for d_nnz,o_nnz are: 4025 .vb 4026 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4027 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4028 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4029 .ve 4030 Here the space allocated is sum of all the above values i.e 34, and 4031 hence pre-allocation is perfect. 4032 4033 Level: intermediate 4034 4035 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4036 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4037 @*/ 4038 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4039 { 4040 PetscFunctionBegin; 4041 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4042 PetscValidType(B,1); 4043 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4044 PetscFunctionReturn(0); 4045 } 4046 4047 /*@ 4048 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4049 CSR format for the local rows. 4050 4051 Collective 4052 4053 Input Parameters: 4054 + comm - MPI communicator 4055 . m - number of local rows (Cannot be PETSC_DECIDE) 4056 . n - This value should be the same as the local size used in creating the 4057 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4058 calculated if N is given) For square matrices n is almost always m. 4059 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4060 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4061 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4062 . j - column indices 4063 - a - matrix values 4064 4065 Output Parameter: 4066 . mat - the matrix 4067 4068 Level: intermediate 4069 4070 Notes: 4071 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4072 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4073 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4074 4075 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4076 4077 The format which is used for the sparse matrix input, is equivalent to a 4078 row-major ordering.. i.e for the following matrix, the input data expected is 4079 as shown 4080 4081 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4082 4083 $ 1 0 0 4084 $ 2 0 3 P0 4085 $ ------- 4086 $ 4 5 6 P1 4087 $ 4088 $ Process0 [P0]: rows_owned=[0,1] 4089 $ i = {0,1,3} [size = nrow+1 = 2+1] 4090 $ j = {0,0,2} [size = 3] 4091 $ v = {1,2,3} [size = 3] 4092 $ 4093 $ Process1 [P1]: rows_owned=[2] 4094 $ i = {0,3} [size = nrow+1 = 1+1] 4095 $ j = {0,1,2} [size = 3] 4096 $ v = {4,5,6} [size = 3] 4097 4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4099 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4100 @*/ 4101 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4102 { 4103 PetscFunctionBegin; 4104 PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4105 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4106 PetscCall(MatCreate(comm,mat)); 4107 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4108 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4109 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4110 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4111 PetscFunctionReturn(0); 4112 } 4113 4114 /*@ 4115 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4116 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4117 4118 Collective 4119 4120 Input Parameters: 4121 + mat - the matrix 4122 . m - number of local rows (Cannot be PETSC_DECIDE) 4123 . n - This value should be the same as the local size used in creating the 4124 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4125 calculated if N is given) For square matrices n is almost always m. 4126 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4127 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4128 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4129 . J - column indices 4130 - v - matrix values 4131 4132 Level: intermediate 4133 4134 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4135 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4136 @*/ 4137 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4138 { 4139 PetscInt cstart,nnz,i,j; 4140 PetscInt *ld; 4141 PetscBool nooffprocentries; 4142 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4143 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4144 PetscScalar *ad,*ao; 4145 const PetscInt *Adi = Ad->i; 4146 PetscInt ldi,Iii,md; 4147 4148 PetscFunctionBegin; 4149 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4150 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4151 PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4152 PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4153 4154 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4155 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4156 cstart = mat->cmap->rstart; 4157 if (!Aij->ld) { 4158 /* count number of entries below block diagonal */ 4159 PetscCall(PetscCalloc1(m,&ld)); 4160 Aij->ld = ld; 4161 for (i=0; i<m; i++) { 4162 nnz = Ii[i+1]- Ii[i]; 4163 j = 0; 4164 while (J[j] < cstart && j < nnz) {j++;} 4165 J += nnz; 4166 ld[i] = j; 4167 } 4168 } else { 4169 ld = Aij->ld; 4170 } 4171 4172 for (i=0; i<m; i++) { 4173 nnz = Ii[i+1]- Ii[i]; 4174 Iii = Ii[i]; 4175 ldi = ld[i]; 4176 md = Adi[i+1]-Adi[i]; 4177 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4178 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4179 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4180 ad += md; 4181 ao += nnz - md; 4182 } 4183 nooffprocentries = mat->nooffprocentries; 4184 mat->nooffprocentries = PETSC_TRUE; 4185 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4186 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4187 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4188 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4189 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4190 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4191 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4192 mat->nooffprocentries = nooffprocentries; 4193 PetscFunctionReturn(0); 4194 } 4195 4196 /*@C 4197 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4198 (the default parallel PETSc format). For good matrix assembly performance 4199 the user should preallocate the matrix storage by setting the parameters 4200 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4201 performance can be increased by more than a factor of 50. 4202 4203 Collective 4204 4205 Input Parameters: 4206 + comm - MPI communicator 4207 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4208 This value should be the same as the local size used in creating the 4209 y vector for the matrix-vector product y = Ax. 4210 . n - This value should be the same as the local size used in creating the 4211 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4212 calculated if N is given) For square matrices n is almost always m. 4213 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4214 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4215 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4216 (same value is used for all local rows) 4217 . d_nnz - array containing the number of nonzeros in the various rows of the 4218 DIAGONAL portion of the local submatrix (possibly different for each row) 4219 or NULL, if d_nz is used to specify the nonzero structure. 4220 The size of this array is equal to the number of local rows, i.e 'm'. 4221 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4222 submatrix (same value is used for all local rows). 4223 - o_nnz - array containing the number of nonzeros in the various rows of the 4224 OFF-DIAGONAL portion of the local submatrix (possibly different for 4225 each row) or NULL, if o_nz is used to specify the nonzero 4226 structure. The size of this array is equal to the number 4227 of local rows, i.e 'm'. 4228 4229 Output Parameter: 4230 . A - the matrix 4231 4232 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4233 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4234 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4235 4236 Notes: 4237 If the *_nnz parameter is given then the *_nz parameter is ignored 4238 4239 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4240 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4241 storage requirements for this matrix. 4242 4243 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4244 processor than it must be used on all processors that share the object for 4245 that argument. 4246 4247 The user MUST specify either the local or global matrix dimensions 4248 (possibly both). 4249 4250 The parallel matrix is partitioned across processors such that the 4251 first m0 rows belong to process 0, the next m1 rows belong to 4252 process 1, the next m2 rows belong to process 2 etc.. where 4253 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4254 values corresponding to [m x N] submatrix. 4255 4256 The columns are logically partitioned with the n0 columns belonging 4257 to 0th partition, the next n1 columns belonging to the next 4258 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4259 4260 The DIAGONAL portion of the local submatrix on any given processor 4261 is the submatrix corresponding to the rows and columns m,n 4262 corresponding to the given processor. i.e diagonal matrix on 4263 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4264 etc. The remaining portion of the local submatrix [m x (N-n)] 4265 constitute the OFF-DIAGONAL portion. The example below better 4266 illustrates this concept. 4267 4268 For a square global matrix we define each processor's diagonal portion 4269 to be its local rows and the corresponding columns (a square submatrix); 4270 each processor's off-diagonal portion encompasses the remainder of the 4271 local matrix (a rectangular submatrix). 4272 4273 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4274 4275 When calling this routine with a single process communicator, a matrix of 4276 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4277 type of communicator, use the construction mechanism 4278 .vb 4279 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4280 .ve 4281 4282 $ MatCreate(...,&A); 4283 $ MatSetType(A,MATMPIAIJ); 4284 $ MatSetSizes(A, m,n,M,N); 4285 $ MatMPIAIJSetPreallocation(A,...); 4286 4287 By default, this format uses inodes (identical nodes) when possible. 4288 We search for consecutive rows with the same nonzero structure, thereby 4289 reusing matrix information to achieve increased efficiency. 4290 4291 Options Database Keys: 4292 + -mat_no_inode - Do not use inodes 4293 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4294 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4295 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4296 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4297 4298 Example usage: 4299 4300 Consider the following 8x8 matrix with 34 non-zero values, that is 4301 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4302 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4303 as follows 4304 4305 .vb 4306 1 2 0 | 0 3 0 | 0 4 4307 Proc0 0 5 6 | 7 0 0 | 8 0 4308 9 0 10 | 11 0 0 | 12 0 4309 ------------------------------------- 4310 13 0 14 | 15 16 17 | 0 0 4311 Proc1 0 18 0 | 19 20 21 | 0 0 4312 0 0 0 | 22 23 0 | 24 0 4313 ------------------------------------- 4314 Proc2 25 26 27 | 0 0 28 | 29 0 4315 30 0 0 | 31 32 33 | 0 34 4316 .ve 4317 4318 This can be represented as a collection of submatrices as 4319 4320 .vb 4321 A B C 4322 D E F 4323 G H I 4324 .ve 4325 4326 Where the submatrices A,B,C are owned by proc0, D,E,F are 4327 owned by proc1, G,H,I are owned by proc2. 4328 4329 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4330 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4331 The 'M','N' parameters are 8,8, and have the same values on all procs. 4332 4333 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4334 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4335 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4336 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4337 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4338 matrix, ans [DF] as another SeqAIJ matrix. 4339 4340 When d_nz, o_nz parameters are specified, d_nz storage elements are 4341 allocated for every row of the local diagonal submatrix, and o_nz 4342 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4343 One way to choose d_nz and o_nz is to use the max nonzerors per local 4344 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4345 In this case, the values of d_nz,o_nz are 4346 .vb 4347 proc0 : dnz = 2, o_nz = 2 4348 proc1 : dnz = 3, o_nz = 2 4349 proc2 : dnz = 1, o_nz = 4 4350 .ve 4351 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4352 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4353 for proc3. i.e we are using 12+15+10=37 storage locations to store 4354 34 values. 4355 4356 When d_nnz, o_nnz parameters are specified, the storage is specified 4357 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4358 In the above case the values for d_nnz,o_nnz are 4359 .vb 4360 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4361 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4362 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4363 .ve 4364 Here the space allocated is sum of all the above values i.e 34, and 4365 hence pre-allocation is perfect. 4366 4367 Level: intermediate 4368 4369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4370 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4371 @*/ 4372 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4373 { 4374 PetscMPIInt size; 4375 4376 PetscFunctionBegin; 4377 PetscCall(MatCreate(comm,A)); 4378 PetscCall(MatSetSizes(*A,m,n,M,N)); 4379 PetscCallMPI(MPI_Comm_size(comm,&size)); 4380 if (size > 1) { 4381 PetscCall(MatSetType(*A,MATMPIAIJ)); 4382 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4383 } else { 4384 PetscCall(MatSetType(*A,MATSEQAIJ)); 4385 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4386 } 4387 PetscFunctionReturn(0); 4388 } 4389 4390 /*@C 4391 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4392 4393 Not collective 4394 4395 Input Parameter: 4396 . A - The MPIAIJ matrix 4397 4398 Output Parameters: 4399 + Ad - The local diagonal block as a SeqAIJ matrix 4400 . Ao - The local off-diagonal block as a SeqAIJ matrix 4401 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4402 4403 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4404 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4405 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4406 local column numbers to global column numbers in the original matrix. 4407 4408 Level: intermediate 4409 4410 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4411 @*/ 4412 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4413 { 4414 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4415 PetscBool flg; 4416 4417 PetscFunctionBegin; 4418 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4419 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4420 if (Ad) *Ad = a->A; 4421 if (Ao) *Ao = a->B; 4422 if (colmap) *colmap = a->garray; 4423 PetscFunctionReturn(0); 4424 } 4425 4426 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4427 { 4428 PetscErrorCode ierr; 4429 PetscInt m,N,i,rstart,nnz,Ii; 4430 PetscInt *indx; 4431 PetscScalar *values; 4432 MatType rootType; 4433 4434 PetscFunctionBegin; 4435 PetscCall(MatGetSize(inmat,&m,&N)); 4436 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4437 PetscInt *dnz,*onz,sum,bs,cbs; 4438 4439 if (n == PETSC_DECIDE) { 4440 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4441 } 4442 /* Check sum(n) = N */ 4443 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4444 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4445 4446 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4447 rstart -= m; 4448 4449 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr); 4450 for (i=0; i<m; i++) { 4451 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4452 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4453 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4454 } 4455 4456 PetscCall(MatCreate(comm,outmat)); 4457 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4458 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4459 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4460 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4461 PetscCall(MatSetType(*outmat,rootType)); 4462 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4463 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4464 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 4465 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4466 } 4467 4468 /* numeric phase */ 4469 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4470 for (i=0; i<m; i++) { 4471 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4472 Ii = i + rstart; 4473 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4474 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4475 } 4476 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4477 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4478 PetscFunctionReturn(0); 4479 } 4480 4481 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4482 { 4483 PetscMPIInt rank; 4484 PetscInt m,N,i,rstart,nnz; 4485 size_t len; 4486 const PetscInt *indx; 4487 PetscViewer out; 4488 char *name; 4489 Mat B; 4490 const PetscScalar *values; 4491 4492 PetscFunctionBegin; 4493 PetscCall(MatGetLocalSize(A,&m,NULL)); 4494 PetscCall(MatGetSize(A,NULL,&N)); 4495 /* Should this be the type of the diagonal block of A? */ 4496 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4497 PetscCall(MatSetSizes(B,m,N,m,N)); 4498 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4499 PetscCall(MatSetType(B,MATSEQAIJ)); 4500 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4501 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4502 for (i=0; i<m; i++) { 4503 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4504 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4505 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4506 } 4507 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4508 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4509 4510 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4511 PetscCall(PetscStrlen(outfile,&len)); 4512 PetscCall(PetscMalloc1(len+6,&name)); 4513 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4514 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4515 PetscCall(PetscFree(name)); 4516 PetscCall(MatView(B,out)); 4517 PetscCall(PetscViewerDestroy(&out)); 4518 PetscCall(MatDestroy(&B)); 4519 PetscFunctionReturn(0); 4520 } 4521 4522 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4523 { 4524 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4525 4526 PetscFunctionBegin; 4527 if (!merge) PetscFunctionReturn(0); 4528 PetscCall(PetscFree(merge->id_r)); 4529 PetscCall(PetscFree(merge->len_s)); 4530 PetscCall(PetscFree(merge->len_r)); 4531 PetscCall(PetscFree(merge->bi)); 4532 PetscCall(PetscFree(merge->bj)); 4533 PetscCall(PetscFree(merge->buf_ri[0])); 4534 PetscCall(PetscFree(merge->buf_ri)); 4535 PetscCall(PetscFree(merge->buf_rj[0])); 4536 PetscCall(PetscFree(merge->buf_rj)); 4537 PetscCall(PetscFree(merge->coi)); 4538 PetscCall(PetscFree(merge->coj)); 4539 PetscCall(PetscFree(merge->owners_co)); 4540 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4541 PetscCall(PetscFree(merge)); 4542 PetscFunctionReturn(0); 4543 } 4544 4545 #include <../src/mat/utils/freespace.h> 4546 #include <petscbt.h> 4547 4548 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4549 { 4550 MPI_Comm comm; 4551 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4552 PetscMPIInt size,rank,taga,*len_s; 4553 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4554 PetscInt proc,m; 4555 PetscInt **buf_ri,**buf_rj; 4556 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4557 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4558 MPI_Request *s_waits,*r_waits; 4559 MPI_Status *status; 4560 const MatScalar *aa,*a_a; 4561 MatScalar **abuf_r,*ba_i; 4562 Mat_Merge_SeqsToMPI *merge; 4563 PetscContainer container; 4564 4565 PetscFunctionBegin; 4566 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4567 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4568 4569 PetscCallMPI(MPI_Comm_size(comm,&size)); 4570 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4571 4572 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4573 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4574 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4575 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4576 aa = a_a; 4577 4578 bi = merge->bi; 4579 bj = merge->bj; 4580 buf_ri = merge->buf_ri; 4581 buf_rj = merge->buf_rj; 4582 4583 PetscCall(PetscMalloc1(size,&status)); 4584 owners = merge->rowmap->range; 4585 len_s = merge->len_s; 4586 4587 /* send and recv matrix values */ 4588 /*-----------------------------*/ 4589 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4590 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4591 4592 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4593 for (proc=0,k=0; proc<size; proc++) { 4594 if (!len_s[proc]) continue; 4595 i = owners[proc]; 4596 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4597 k++; 4598 } 4599 4600 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4601 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4602 PetscCall(PetscFree(status)); 4603 4604 PetscCall(PetscFree(s_waits)); 4605 PetscCall(PetscFree(r_waits)); 4606 4607 /* insert mat values of mpimat */ 4608 /*----------------------------*/ 4609 PetscCall(PetscMalloc1(N,&ba_i)); 4610 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4611 4612 for (k=0; k<merge->nrecv; k++) { 4613 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4614 nrows = *(buf_ri_k[k]); 4615 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4616 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4617 } 4618 4619 /* set values of ba */ 4620 m = merge->rowmap->n; 4621 for (i=0; i<m; i++) { 4622 arow = owners[rank] + i; 4623 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4624 bnzi = bi[i+1] - bi[i]; 4625 PetscCall(PetscArrayzero(ba_i,bnzi)); 4626 4627 /* add local non-zero vals of this proc's seqmat into ba */ 4628 anzi = ai[arow+1] - ai[arow]; 4629 aj = a->j + ai[arow]; 4630 aa = a_a + ai[arow]; 4631 nextaj = 0; 4632 for (j=0; nextaj<anzi; j++) { 4633 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4634 ba_i[j] += aa[nextaj++]; 4635 } 4636 } 4637 4638 /* add received vals into ba */ 4639 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4640 /* i-th row */ 4641 if (i == *nextrow[k]) { 4642 anzi = *(nextai[k]+1) - *nextai[k]; 4643 aj = buf_rj[k] + *(nextai[k]); 4644 aa = abuf_r[k] + *(nextai[k]); 4645 nextaj = 0; 4646 for (j=0; nextaj<anzi; j++) { 4647 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4648 ba_i[j] += aa[nextaj++]; 4649 } 4650 } 4651 nextrow[k]++; nextai[k]++; 4652 } 4653 } 4654 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4655 } 4656 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4657 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4658 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4659 4660 PetscCall(PetscFree(abuf_r[0])); 4661 PetscCall(PetscFree(abuf_r)); 4662 PetscCall(PetscFree(ba_i)); 4663 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4664 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4665 PetscFunctionReturn(0); 4666 } 4667 4668 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4669 { 4670 PetscErrorCode ierr; 4671 Mat B_mpi; 4672 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4673 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4674 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4675 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4676 PetscInt len,proc,*dnz,*onz,bs,cbs; 4677 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4678 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4679 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4680 MPI_Status *status; 4681 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4682 PetscBT lnkbt; 4683 Mat_Merge_SeqsToMPI *merge; 4684 PetscContainer container; 4685 4686 PetscFunctionBegin; 4687 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4688 4689 /* make sure it is a PETSc comm */ 4690 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4691 PetscCallMPI(MPI_Comm_size(comm,&size)); 4692 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4693 4694 PetscCall(PetscNew(&merge)); 4695 PetscCall(PetscMalloc1(size,&status)); 4696 4697 /* determine row ownership */ 4698 /*---------------------------------------------------------*/ 4699 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4700 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4701 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4702 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4703 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4704 PetscCall(PetscMalloc1(size,&len_si)); 4705 PetscCall(PetscMalloc1(size,&merge->len_s)); 4706 4707 m = merge->rowmap->n; 4708 owners = merge->rowmap->range; 4709 4710 /* determine the number of messages to send, their lengths */ 4711 /*---------------------------------------------------------*/ 4712 len_s = merge->len_s; 4713 4714 len = 0; /* length of buf_si[] */ 4715 merge->nsend = 0; 4716 for (proc=0; proc<size; proc++) { 4717 len_si[proc] = 0; 4718 if (proc == rank) { 4719 len_s[proc] = 0; 4720 } else { 4721 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4722 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4723 } 4724 if (len_s[proc]) { 4725 merge->nsend++; 4726 nrows = 0; 4727 for (i=owners[proc]; i<owners[proc+1]; i++) { 4728 if (ai[i+1] > ai[i]) nrows++; 4729 } 4730 len_si[proc] = 2*(nrows+1); 4731 len += len_si[proc]; 4732 } 4733 } 4734 4735 /* determine the number and length of messages to receive for ij-structure */ 4736 /*-------------------------------------------------------------------------*/ 4737 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4738 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4739 4740 /* post the Irecv of j-structure */ 4741 /*-------------------------------*/ 4742 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4743 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4744 4745 /* post the Isend of j-structure */ 4746 /*--------------------------------*/ 4747 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4748 4749 for (proc=0, k=0; proc<size; proc++) { 4750 if (!len_s[proc]) continue; 4751 i = owners[proc]; 4752 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4753 k++; 4754 } 4755 4756 /* receives and sends of j-structure are complete */ 4757 /*------------------------------------------------*/ 4758 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4759 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4760 4761 /* send and recv i-structure */ 4762 /*---------------------------*/ 4763 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4764 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4765 4766 PetscCall(PetscMalloc1(len+1,&buf_s)); 4767 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4768 for (proc=0,k=0; proc<size; proc++) { 4769 if (!len_s[proc]) continue; 4770 /* form outgoing message for i-structure: 4771 buf_si[0]: nrows to be sent 4772 [1:nrows]: row index (global) 4773 [nrows+1:2*nrows+1]: i-structure index 4774 */ 4775 /*-------------------------------------------*/ 4776 nrows = len_si[proc]/2 - 1; 4777 buf_si_i = buf_si + nrows+1; 4778 buf_si[0] = nrows; 4779 buf_si_i[0] = 0; 4780 nrows = 0; 4781 for (i=owners[proc]; i<owners[proc+1]; i++) { 4782 anzi = ai[i+1] - ai[i]; 4783 if (anzi) { 4784 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4785 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4786 nrows++; 4787 } 4788 } 4789 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4790 k++; 4791 buf_si += len_si[proc]; 4792 } 4793 4794 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4795 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4796 4797 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4798 for (i=0; i<merge->nrecv; i++) { 4799 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4800 } 4801 4802 PetscCall(PetscFree(len_si)); 4803 PetscCall(PetscFree(len_ri)); 4804 PetscCall(PetscFree(rj_waits)); 4805 PetscCall(PetscFree2(si_waits,sj_waits)); 4806 PetscCall(PetscFree(ri_waits)); 4807 PetscCall(PetscFree(buf_s)); 4808 PetscCall(PetscFree(status)); 4809 4810 /* compute a local seq matrix in each processor */ 4811 /*----------------------------------------------*/ 4812 /* allocate bi array and free space for accumulating nonzero column info */ 4813 PetscCall(PetscMalloc1(m+1,&bi)); 4814 bi[0] = 0; 4815 4816 /* create and initialize a linked list */ 4817 nlnk = N+1; 4818 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4819 4820 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4821 len = ai[owners[rank+1]] - ai[owners[rank]]; 4822 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4823 4824 current_space = free_space; 4825 4826 /* determine symbolic info for each local row */ 4827 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4828 4829 for (k=0; k<merge->nrecv; k++) { 4830 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4831 nrows = *buf_ri_k[k]; 4832 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4833 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4834 } 4835 4836 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr); 4837 len = 0; 4838 for (i=0; i<m; i++) { 4839 bnzi = 0; 4840 /* add local non-zero cols of this proc's seqmat into lnk */ 4841 arow = owners[rank] + i; 4842 anzi = ai[arow+1] - ai[arow]; 4843 aj = a->j + ai[arow]; 4844 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4845 bnzi += nlnk; 4846 /* add received col data into lnk */ 4847 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4848 if (i == *nextrow[k]) { /* i-th row */ 4849 anzi = *(nextai[k]+1) - *nextai[k]; 4850 aj = buf_rj[k] + *nextai[k]; 4851 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4852 bnzi += nlnk; 4853 nextrow[k]++; nextai[k]++; 4854 } 4855 } 4856 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4857 4858 /* if free space is not available, make more free space */ 4859 if (current_space->local_remaining<bnzi) { 4860 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4861 nspacedouble++; 4862 } 4863 /* copy data into free space, then initialize lnk */ 4864 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4865 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4866 4867 current_space->array += bnzi; 4868 current_space->local_used += bnzi; 4869 current_space->local_remaining -= bnzi; 4870 4871 bi[i+1] = bi[i] + bnzi; 4872 } 4873 4874 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4875 4876 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4877 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4878 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4879 4880 /* create symbolic parallel matrix B_mpi */ 4881 /*---------------------------------------*/ 4882 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4883 PetscCall(MatCreate(comm,&B_mpi)); 4884 if (n==PETSC_DECIDE) { 4885 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4886 } else { 4887 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4888 } 4889 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4890 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4891 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4892 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 4893 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4894 4895 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4896 B_mpi->assembled = PETSC_FALSE; 4897 merge->bi = bi; 4898 merge->bj = bj; 4899 merge->buf_ri = buf_ri; 4900 merge->buf_rj = buf_rj; 4901 merge->coi = NULL; 4902 merge->coj = NULL; 4903 merge->owners_co = NULL; 4904 4905 PetscCall(PetscCommDestroy(&comm)); 4906 4907 /* attach the supporting struct to B_mpi for reuse */ 4908 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4909 PetscCall(PetscContainerSetPointer(container,merge)); 4910 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4911 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4912 PetscCall(PetscContainerDestroy(&container)); 4913 *mpimat = B_mpi; 4914 4915 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 /*@C 4920 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4921 matrices from each processor 4922 4923 Collective 4924 4925 Input Parameters: 4926 + comm - the communicators the parallel matrix will live on 4927 . seqmat - the input sequential matrices 4928 . m - number of local rows (or PETSC_DECIDE) 4929 . n - number of local columns (or PETSC_DECIDE) 4930 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4931 4932 Output Parameter: 4933 . mpimat - the parallel matrix generated 4934 4935 Level: advanced 4936 4937 Notes: 4938 The dimensions of the sequential matrix in each processor MUST be the same. 4939 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4940 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4941 @*/ 4942 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4943 { 4944 PetscMPIInt size; 4945 4946 PetscFunctionBegin; 4947 PetscCallMPI(MPI_Comm_size(comm,&size)); 4948 if (size == 1) { 4949 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4950 if (scall == MAT_INITIAL_MATRIX) { 4951 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4952 } else { 4953 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4954 } 4955 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4956 PetscFunctionReturn(0); 4957 } 4958 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4959 if (scall == MAT_INITIAL_MATRIX) { 4960 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4961 } 4962 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4963 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 /*@ 4968 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4969 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4970 with MatGetSize() 4971 4972 Not Collective 4973 4974 Input Parameters: 4975 + A - the matrix 4976 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4977 4978 Output Parameter: 4979 . A_loc - the local sequential matrix generated 4980 4981 Level: developer 4982 4983 Notes: 4984 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4985 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4986 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4987 modify the values of the returned A_loc. 4988 4989 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 4990 @*/ 4991 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4992 { 4993 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4994 Mat_SeqAIJ *mat,*a,*b; 4995 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4996 const PetscScalar *aa,*ba,*aav,*bav; 4997 PetscScalar *ca,*cam; 4998 PetscMPIInt size; 4999 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5000 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5001 PetscBool match; 5002 5003 PetscFunctionBegin; 5004 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5005 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5006 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5007 if (size == 1) { 5008 if (scall == MAT_INITIAL_MATRIX) { 5009 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5010 *A_loc = mpimat->A; 5011 } else if (scall == MAT_REUSE_MATRIX) { 5012 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5013 } 5014 PetscFunctionReturn(0); 5015 } 5016 5017 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5018 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5019 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5020 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5021 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5022 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5023 aa = aav; 5024 ba = bav; 5025 if (scall == MAT_INITIAL_MATRIX) { 5026 PetscCall(PetscMalloc1(1+am,&ci)); 5027 ci[0] = 0; 5028 for (i=0; i<am; i++) { 5029 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5030 } 5031 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5032 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5033 k = 0; 5034 for (i=0; i<am; i++) { 5035 ncols_o = bi[i+1] - bi[i]; 5036 ncols_d = ai[i+1] - ai[i]; 5037 /* off-diagonal portion of A */ 5038 for (jo=0; jo<ncols_o; jo++) { 5039 col = cmap[*bj]; 5040 if (col >= cstart) break; 5041 cj[k] = col; bj++; 5042 ca[k++] = *ba++; 5043 } 5044 /* diagonal portion of A */ 5045 for (j=0; j<ncols_d; j++) { 5046 cj[k] = cstart + *aj++; 5047 ca[k++] = *aa++; 5048 } 5049 /* off-diagonal portion of A */ 5050 for (j=jo; j<ncols_o; j++) { 5051 cj[k] = cmap[*bj++]; 5052 ca[k++] = *ba++; 5053 } 5054 } 5055 /* put together the new matrix */ 5056 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5057 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5058 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5059 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5060 mat->free_a = PETSC_TRUE; 5061 mat->free_ij = PETSC_TRUE; 5062 mat->nonew = 0; 5063 } else if (scall == MAT_REUSE_MATRIX) { 5064 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5065 ci = mat->i; 5066 cj = mat->j; 5067 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5068 for (i=0; i<am; i++) { 5069 /* off-diagonal portion of A */ 5070 ncols_o = bi[i+1] - bi[i]; 5071 for (jo=0; jo<ncols_o; jo++) { 5072 col = cmap[*bj]; 5073 if (col >= cstart) break; 5074 *cam++ = *ba++; bj++; 5075 } 5076 /* diagonal portion of A */ 5077 ncols_d = ai[i+1] - ai[i]; 5078 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5079 /* off-diagonal portion of A */ 5080 for (j=jo; j<ncols_o; j++) { 5081 *cam++ = *ba++; bj++; 5082 } 5083 } 5084 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5085 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5086 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5087 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5088 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 /*@ 5093 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5094 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5095 5096 Not Collective 5097 5098 Input Parameters: 5099 + A - the matrix 5100 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5101 5102 Output Parameters: 5103 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5104 - A_loc - the local sequential matrix generated 5105 5106 Level: developer 5107 5108 Notes: 5109 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5110 5111 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5112 5113 @*/ 5114 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5115 { 5116 Mat Ao,Ad; 5117 const PetscInt *cmap; 5118 PetscMPIInt size; 5119 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5120 5121 PetscFunctionBegin; 5122 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5123 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5124 if (size == 1) { 5125 if (scall == MAT_INITIAL_MATRIX) { 5126 PetscCall(PetscObjectReference((PetscObject)Ad)); 5127 *A_loc = Ad; 5128 } else if (scall == MAT_REUSE_MATRIX) { 5129 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5130 } 5131 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5132 PetscFunctionReturn(0); 5133 } 5134 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5135 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5136 if (f) { 5137 PetscCall((*f)(A,scall,glob,A_loc)); 5138 } else { 5139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5140 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5141 Mat_SeqAIJ *c; 5142 PetscInt *ai = a->i, *aj = a->j; 5143 PetscInt *bi = b->i, *bj = b->j; 5144 PetscInt *ci,*cj; 5145 const PetscScalar *aa,*ba; 5146 PetscScalar *ca; 5147 PetscInt i,j,am,dn,on; 5148 5149 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5150 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5151 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5152 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 PetscInt k; 5155 PetscCall(PetscMalloc1(1+am,&ci)); 5156 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5157 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5158 ci[0] = 0; 5159 for (i=0,k=0; i<am; i++) { 5160 const PetscInt ncols_o = bi[i+1] - bi[i]; 5161 const PetscInt ncols_d = ai[i+1] - ai[i]; 5162 ci[i+1] = ci[i] + ncols_o + ncols_d; 5163 /* diagonal portion of A */ 5164 for (j=0; j<ncols_d; j++,k++) { 5165 cj[k] = *aj++; 5166 ca[k] = *aa++; 5167 } 5168 /* off-diagonal portion of A */ 5169 for (j=0; j<ncols_o; j++,k++) { 5170 cj[k] = dn + *bj++; 5171 ca[k] = *ba++; 5172 } 5173 } 5174 /* put together the new matrix */ 5175 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5176 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5177 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5178 c = (Mat_SeqAIJ*)(*A_loc)->data; 5179 c->free_a = PETSC_TRUE; 5180 c->free_ij = PETSC_TRUE; 5181 c->nonew = 0; 5182 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5183 } else if (scall == MAT_REUSE_MATRIX) { 5184 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5185 for (i=0; i<am; i++) { 5186 const PetscInt ncols_d = ai[i+1] - ai[i]; 5187 const PetscInt ncols_o = bi[i+1] - bi[i]; 5188 /* diagonal portion of A */ 5189 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5190 /* off-diagonal portion of A */ 5191 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5192 } 5193 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5194 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5195 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5196 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5197 if (glob) { 5198 PetscInt cst, *gidx; 5199 5200 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5201 PetscCall(PetscMalloc1(dn+on,&gidx)); 5202 for (i=0; i<dn; i++) gidx[i] = cst + i; 5203 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5204 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5205 } 5206 } 5207 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 /*@C 5212 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5213 5214 Not Collective 5215 5216 Input Parameters: 5217 + A - the matrix 5218 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5219 - row, col - index sets of rows and columns to extract (or NULL) 5220 5221 Output Parameter: 5222 . A_loc - the local sequential matrix generated 5223 5224 Level: developer 5225 5226 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5227 5228 @*/ 5229 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5230 { 5231 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5232 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5233 IS isrowa,iscola; 5234 Mat *aloc; 5235 PetscBool match; 5236 5237 PetscFunctionBegin; 5238 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5239 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5240 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5241 if (!row) { 5242 start = A->rmap->rstart; end = A->rmap->rend; 5243 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5244 } else { 5245 isrowa = *row; 5246 } 5247 if (!col) { 5248 start = A->cmap->rstart; 5249 cmap = a->garray; 5250 nzA = a->A->cmap->n; 5251 nzB = a->B->cmap->n; 5252 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5253 ncols = 0; 5254 for (i=0; i<nzB; i++) { 5255 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5256 else break; 5257 } 5258 imark = i; 5259 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5260 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5261 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5262 } else { 5263 iscola = *col; 5264 } 5265 if (scall != MAT_INITIAL_MATRIX) { 5266 PetscCall(PetscMalloc1(1,&aloc)); 5267 aloc[0] = *A_loc; 5268 } 5269 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5270 if (!col) { /* attach global id of condensed columns */ 5271 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5272 } 5273 *A_loc = aloc[0]; 5274 PetscCall(PetscFree(aloc)); 5275 if (!row) { 5276 PetscCall(ISDestroy(&isrowa)); 5277 } 5278 if (!col) { 5279 PetscCall(ISDestroy(&iscola)); 5280 } 5281 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 /* 5286 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5287 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5288 * on a global size. 5289 * */ 5290 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5291 { 5292 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5293 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5294 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5295 PetscMPIInt owner; 5296 PetscSFNode *iremote,*oiremote; 5297 const PetscInt *lrowindices; 5298 PetscSF sf,osf; 5299 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5300 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5301 MPI_Comm comm; 5302 ISLocalToGlobalMapping mapping; 5303 const PetscScalar *pd_a,*po_a; 5304 5305 PetscFunctionBegin; 5306 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5307 /* plocalsize is the number of roots 5308 * nrows is the number of leaves 5309 * */ 5310 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5311 PetscCall(ISGetLocalSize(rows,&nrows)); 5312 PetscCall(PetscCalloc1(nrows,&iremote)); 5313 PetscCall(ISGetIndices(rows,&lrowindices)); 5314 for (i=0;i<nrows;i++) { 5315 /* Find a remote index and an owner for a row 5316 * The row could be local or remote 5317 * */ 5318 owner = 0; 5319 lidx = 0; 5320 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5321 iremote[i].index = lidx; 5322 iremote[i].rank = owner; 5323 } 5324 /* Create SF to communicate how many nonzero columns for each row */ 5325 PetscCall(PetscSFCreate(comm,&sf)); 5326 /* SF will figure out the number of nonzero colunms for each row, and their 5327 * offsets 5328 * */ 5329 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5330 PetscCall(PetscSFSetFromOptions(sf)); 5331 PetscCall(PetscSFSetUp(sf)); 5332 5333 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5334 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5335 PetscCall(PetscCalloc1(nrows,&pnnz)); 5336 roffsets[0] = 0; 5337 roffsets[1] = 0; 5338 for (i=0;i<plocalsize;i++) { 5339 /* diag */ 5340 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5341 /* off diag */ 5342 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5343 /* compute offsets so that we relative location for each row */ 5344 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5345 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5346 } 5347 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5348 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5349 /* 'r' means root, and 'l' means leaf */ 5350 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5351 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5352 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5353 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5354 PetscCall(PetscSFDestroy(&sf)); 5355 PetscCall(PetscFree(roffsets)); 5356 PetscCall(PetscFree(nrcols)); 5357 dntotalcols = 0; 5358 ontotalcols = 0; 5359 ncol = 0; 5360 for (i=0;i<nrows;i++) { 5361 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5362 ncol = PetscMax(pnnz[i],ncol); 5363 /* diag */ 5364 dntotalcols += nlcols[i*2+0]; 5365 /* off diag */ 5366 ontotalcols += nlcols[i*2+1]; 5367 } 5368 /* We do not need to figure the right number of columns 5369 * since all the calculations will be done by going through the raw data 5370 * */ 5371 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5372 PetscCall(MatSetUp(*P_oth)); 5373 PetscCall(PetscFree(pnnz)); 5374 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5375 /* diag */ 5376 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5377 /* off diag */ 5378 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5379 /* diag */ 5380 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5381 /* off diag */ 5382 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5383 dntotalcols = 0; 5384 ontotalcols = 0; 5385 ntotalcols = 0; 5386 for (i=0;i<nrows;i++) { 5387 owner = 0; 5388 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5389 /* Set iremote for diag matrix */ 5390 for (j=0;j<nlcols[i*2+0];j++) { 5391 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5392 iremote[dntotalcols].rank = owner; 5393 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5394 ilocal[dntotalcols++] = ntotalcols++; 5395 } 5396 /* off diag */ 5397 for (j=0;j<nlcols[i*2+1];j++) { 5398 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5399 oiremote[ontotalcols].rank = owner; 5400 oilocal[ontotalcols++] = ntotalcols++; 5401 } 5402 } 5403 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5404 PetscCall(PetscFree(loffsets)); 5405 PetscCall(PetscFree(nlcols)); 5406 PetscCall(PetscSFCreate(comm,&sf)); 5407 /* P serves as roots and P_oth is leaves 5408 * Diag matrix 5409 * */ 5410 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5411 PetscCall(PetscSFSetFromOptions(sf)); 5412 PetscCall(PetscSFSetUp(sf)); 5413 5414 PetscCall(PetscSFCreate(comm,&osf)); 5415 /* Off diag */ 5416 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5417 PetscCall(PetscSFSetFromOptions(osf)); 5418 PetscCall(PetscSFSetUp(osf)); 5419 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5420 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5421 /* We operate on the matrix internal data for saving memory */ 5422 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5423 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5424 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5425 /* Convert to global indices for diag matrix */ 5426 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5427 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5428 /* We want P_oth store global indices */ 5429 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5430 /* Use memory scalable approach */ 5431 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5432 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5433 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5434 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5435 /* Convert back to local indices */ 5436 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5437 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5438 nout = 0; 5439 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5440 PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5441 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5442 /* Exchange values */ 5443 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5444 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5445 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5446 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5447 /* Stop PETSc from shrinking memory */ 5448 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5449 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5450 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5451 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5452 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5453 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5454 PetscCall(PetscSFDestroy(&sf)); 5455 PetscCall(PetscSFDestroy(&osf)); 5456 PetscFunctionReturn(0); 5457 } 5458 5459 /* 5460 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5461 * This supports MPIAIJ and MAIJ 5462 * */ 5463 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5464 { 5465 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5466 Mat_SeqAIJ *p_oth; 5467 IS rows,map; 5468 PetscHMapI hamp; 5469 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5470 MPI_Comm comm; 5471 PetscSF sf,osf; 5472 PetscBool has; 5473 5474 PetscFunctionBegin; 5475 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5476 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5477 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5478 * and then create a submatrix (that often is an overlapping matrix) 5479 * */ 5480 if (reuse == MAT_INITIAL_MATRIX) { 5481 /* Use a hash table to figure out unique keys */ 5482 PetscCall(PetscHMapICreate(&hamp)); 5483 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5484 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5485 count = 0; 5486 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5487 for (i=0;i<a->B->cmap->n;i++) { 5488 key = a->garray[i]/dof; 5489 PetscCall(PetscHMapIHas(hamp,key,&has)); 5490 if (!has) { 5491 mapping[i] = count; 5492 PetscCall(PetscHMapISet(hamp,key,count++)); 5493 } else { 5494 /* Current 'i' has the same value the previous step */ 5495 mapping[i] = count-1; 5496 } 5497 } 5498 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5499 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5500 PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5501 PetscCall(PetscCalloc1(htsize,&rowindices)); 5502 off = 0; 5503 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5504 PetscCall(PetscHMapIDestroy(&hamp)); 5505 PetscCall(PetscSortInt(htsize,rowindices)); 5506 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5507 /* In case, the matrix was already created but users want to recreate the matrix */ 5508 PetscCall(MatDestroy(P_oth)); 5509 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5510 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5511 PetscCall(ISDestroy(&map)); 5512 PetscCall(ISDestroy(&rows)); 5513 } else if (reuse == MAT_REUSE_MATRIX) { 5514 /* If matrix was already created, we simply update values using SF objects 5515 * that as attached to the matrix ealier. 5516 */ 5517 const PetscScalar *pd_a,*po_a; 5518 5519 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5520 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5521 PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5522 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5523 /* Update values in place */ 5524 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5525 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5526 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5527 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5528 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5529 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5530 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5531 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5532 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5533 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5534 PetscFunctionReturn(0); 5535 } 5536 5537 /*@C 5538 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5539 5540 Collective on Mat 5541 5542 Input Parameters: 5543 + A - the first matrix in mpiaij format 5544 . B - the second matrix in mpiaij format 5545 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5546 5547 Output Parameters: 5548 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5549 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5550 - B_seq - the sequential matrix generated 5551 5552 Level: developer 5553 5554 @*/ 5555 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5556 { 5557 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5558 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5559 IS isrowb,iscolb; 5560 Mat *bseq=NULL; 5561 5562 PetscFunctionBegin; 5563 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5564 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5565 } 5566 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5567 5568 if (scall == MAT_INITIAL_MATRIX) { 5569 start = A->cmap->rstart; 5570 cmap = a->garray; 5571 nzA = a->A->cmap->n; 5572 nzB = a->B->cmap->n; 5573 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5574 ncols = 0; 5575 for (i=0; i<nzB; i++) { /* row < local row index */ 5576 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5577 else break; 5578 } 5579 imark = i; 5580 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5581 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5582 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5583 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5584 } else { 5585 PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5586 isrowb = *rowb; iscolb = *colb; 5587 PetscCall(PetscMalloc1(1,&bseq)); 5588 bseq[0] = *B_seq; 5589 } 5590 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5591 *B_seq = bseq[0]; 5592 PetscCall(PetscFree(bseq)); 5593 if (!rowb) { 5594 PetscCall(ISDestroy(&isrowb)); 5595 } else { 5596 *rowb = isrowb; 5597 } 5598 if (!colb) { 5599 PetscCall(ISDestroy(&iscolb)); 5600 } else { 5601 *colb = iscolb; 5602 } 5603 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5604 PetscFunctionReturn(0); 5605 } 5606 5607 /* 5608 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5609 of the OFF-DIAGONAL portion of local A 5610 5611 Collective on Mat 5612 5613 Input Parameters: 5614 + A,B - the matrices in mpiaij format 5615 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5616 5617 Output Parameter: 5618 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5619 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5620 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5621 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5622 5623 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5624 for this matrix. This is not desirable.. 5625 5626 Level: developer 5627 5628 */ 5629 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5630 { 5631 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5632 Mat_SeqAIJ *b_oth; 5633 VecScatter ctx; 5634 MPI_Comm comm; 5635 const PetscMPIInt *rprocs,*sprocs; 5636 const PetscInt *srow,*rstarts,*sstarts; 5637 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5638 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5639 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5640 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5641 PetscMPIInt size,tag,rank,nreqs; 5642 5643 PetscFunctionBegin; 5644 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5645 PetscCallMPI(MPI_Comm_size(comm,&size)); 5646 5647 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5648 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5649 } 5650 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5651 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5652 5653 if (size == 1) { 5654 startsj_s = NULL; 5655 bufa_ptr = NULL; 5656 *B_oth = NULL; 5657 PetscFunctionReturn(0); 5658 } 5659 5660 ctx = a->Mvctx; 5661 tag = ((PetscObject)ctx)->tag; 5662 5663 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5664 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5665 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5666 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5667 PetscCall(PetscMalloc1(nreqs,&reqs)); 5668 rwaits = reqs; 5669 swaits = reqs + nrecvs; 5670 5671 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5672 if (scall == MAT_INITIAL_MATRIX) { 5673 /* i-array */ 5674 /*---------*/ 5675 /* post receives */ 5676 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5677 for (i=0; i<nrecvs; i++) { 5678 rowlen = rvalues + rstarts[i]*rbs; 5679 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5680 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5681 } 5682 5683 /* pack the outgoing message */ 5684 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5685 5686 sstartsj[0] = 0; 5687 rstartsj[0] = 0; 5688 len = 0; /* total length of j or a array to be sent */ 5689 if (nsends) { 5690 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5691 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5692 } 5693 for (i=0; i<nsends; i++) { 5694 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5695 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5696 for (j=0; j<nrows; j++) { 5697 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5698 for (l=0; l<sbs; l++) { 5699 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5700 5701 rowlen[j*sbs+l] = ncols; 5702 5703 len += ncols; 5704 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5705 } 5706 k++; 5707 } 5708 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5709 5710 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5711 } 5712 /* recvs and sends of i-array are completed */ 5713 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5714 PetscCall(PetscFree(svalues)); 5715 5716 /* allocate buffers for sending j and a arrays */ 5717 PetscCall(PetscMalloc1(len+1,&bufj)); 5718 PetscCall(PetscMalloc1(len+1,&bufa)); 5719 5720 /* create i-array of B_oth */ 5721 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5722 5723 b_othi[0] = 0; 5724 len = 0; /* total length of j or a array to be received */ 5725 k = 0; 5726 for (i=0; i<nrecvs; i++) { 5727 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5728 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5729 for (j=0; j<nrows; j++) { 5730 b_othi[k+1] = b_othi[k] + rowlen[j]; 5731 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5732 k++; 5733 } 5734 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5735 } 5736 PetscCall(PetscFree(rvalues)); 5737 5738 /* allocate space for j and a arrays of B_oth */ 5739 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5740 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5741 5742 /* j-array */ 5743 /*---------*/ 5744 /* post receives of j-array */ 5745 for (i=0; i<nrecvs; i++) { 5746 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5747 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5748 } 5749 5750 /* pack the outgoing message j-array */ 5751 if (nsends) k = sstarts[0]; 5752 for (i=0; i<nsends; i++) { 5753 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5754 bufJ = bufj+sstartsj[i]; 5755 for (j=0; j<nrows; j++) { 5756 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5757 for (ll=0; ll<sbs; ll++) { 5758 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5759 for (l=0; l<ncols; l++) { 5760 *bufJ++ = cols[l]; 5761 } 5762 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5763 } 5764 } 5765 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5766 } 5767 5768 /* recvs and sends of j-array are completed */ 5769 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5770 } else if (scall == MAT_REUSE_MATRIX) { 5771 sstartsj = *startsj_s; 5772 rstartsj = *startsj_r; 5773 bufa = *bufa_ptr; 5774 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5775 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5776 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5777 5778 /* a-array */ 5779 /*---------*/ 5780 /* post receives of a-array */ 5781 for (i=0; i<nrecvs; i++) { 5782 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5783 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5784 } 5785 5786 /* pack the outgoing message a-array */ 5787 if (nsends) k = sstarts[0]; 5788 for (i=0; i<nsends; i++) { 5789 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5790 bufA = bufa+sstartsj[i]; 5791 for (j=0; j<nrows; j++) { 5792 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5793 for (ll=0; ll<sbs; ll++) { 5794 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5795 for (l=0; l<ncols; l++) { 5796 *bufA++ = vals[l]; 5797 } 5798 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5799 } 5800 } 5801 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5802 } 5803 /* recvs and sends of a-array are completed */ 5804 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5805 PetscCall(PetscFree(reqs)); 5806 5807 if (scall == MAT_INITIAL_MATRIX) { 5808 /* put together the new matrix */ 5809 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5810 5811 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5812 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5813 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5814 b_oth->free_a = PETSC_TRUE; 5815 b_oth->free_ij = PETSC_TRUE; 5816 b_oth->nonew = 0; 5817 5818 PetscCall(PetscFree(bufj)); 5819 if (!startsj_s || !bufa_ptr) { 5820 PetscCall(PetscFree2(sstartsj,rstartsj)); 5821 PetscCall(PetscFree(bufa_ptr)); 5822 } else { 5823 *startsj_s = sstartsj; 5824 *startsj_r = rstartsj; 5825 *bufa_ptr = bufa; 5826 } 5827 } else if (scall == MAT_REUSE_MATRIX) { 5828 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5829 } 5830 5831 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5832 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5833 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5834 PetscFunctionReturn(0); 5835 } 5836 5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5839 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5840 #if defined(PETSC_HAVE_MKL_SPARSE) 5841 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5842 #endif 5843 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5844 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5845 #if defined(PETSC_HAVE_ELEMENTAL) 5846 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5847 #endif 5848 #if defined(PETSC_HAVE_SCALAPACK) 5849 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5850 #endif 5851 #if defined(PETSC_HAVE_HYPRE) 5852 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5853 #endif 5854 #if defined(PETSC_HAVE_CUDA) 5855 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5856 #endif 5857 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5859 #endif 5860 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5861 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5862 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5863 5864 /* 5865 Computes (B'*A')' since computing B*A directly is untenable 5866 5867 n p p 5868 [ ] [ ] [ ] 5869 m [ A ] * n [ B ] = m [ C ] 5870 [ ] [ ] [ ] 5871 5872 */ 5873 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5874 { 5875 Mat At,Bt,Ct; 5876 5877 PetscFunctionBegin; 5878 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5879 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5880 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5881 PetscCall(MatDestroy(&At)); 5882 PetscCall(MatDestroy(&Bt)); 5883 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5884 PetscCall(MatDestroy(&Ct)); 5885 PetscFunctionReturn(0); 5886 } 5887 5888 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5889 { 5890 PetscBool cisdense; 5891 5892 PetscFunctionBegin; 5893 PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5894 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5895 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5896 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5897 if (!cisdense) { 5898 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5899 } 5900 PetscCall(MatSetUp(C)); 5901 5902 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5903 PetscFunctionReturn(0); 5904 } 5905 5906 /* ----------------------------------------------------------------*/ 5907 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5908 { 5909 Mat_Product *product = C->product; 5910 Mat A = product->A,B=product->B; 5911 5912 PetscFunctionBegin; 5913 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5914 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5915 5916 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5917 C->ops->productsymbolic = MatProductSymbolic_AB; 5918 PetscFunctionReturn(0); 5919 } 5920 5921 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5922 { 5923 Mat_Product *product = C->product; 5924 5925 PetscFunctionBegin; 5926 if (product->type == MATPRODUCT_AB) { 5927 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5928 } 5929 PetscFunctionReturn(0); 5930 } 5931 5932 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 5933 is greater than value, or last if there is no such element. 5934 */ 5935 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 5936 { 5937 PetscCount it,step,count = last - first; 5938 5939 PetscFunctionBegin; 5940 while (count > 0) { 5941 it = first; 5942 step = count / 2; 5943 it += step; 5944 if (!(value < array[it])) { 5945 first = ++it; 5946 count -= step + 1; 5947 } else count = step; 5948 } 5949 *upper = first; 5950 PetscFunctionReturn(0); 5951 } 5952 5953 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 5954 5955 Input Parameters: 5956 5957 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5958 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5959 5960 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 5961 5962 For Set1, j1[] contains column indices of the nonzeros. 5963 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5964 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5965 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5966 5967 Similar for Set2. 5968 5969 This routine merges the two sets of nonzeros row by row and removes repeats. 5970 5971 Output Parameters: (memories are allocated by the caller) 5972 5973 i[],j[]: the CSR of the merged matrix, which has m rows. 5974 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5975 imap2[]: similar to imap1[], but for Set2. 5976 Note we order nonzeros row-by-row and from left to right. 5977 */ 5978 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5979 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5980 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5981 { 5982 PetscInt r,m; /* Row index of mat */ 5983 PetscCount t,t1,t2,b1,e1,b2,e2; 5984 5985 PetscFunctionBegin; 5986 PetscCall(MatGetLocalSize(mat,&m,NULL)); 5987 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5988 i[0] = 0; 5989 for (r=0; r<m; r++) { /* Do row by row merging */ 5990 b1 = rowBegin1[r]; 5991 e1 = rowEnd1[r]; 5992 b2 = rowBegin2[r]; 5993 e2 = rowEnd2[r]; 5994 while (b1 < e1 && b2 < e2) { 5995 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5996 j[t] = j1[b1]; 5997 imap1[t1] = t; 5998 imap2[t2] = t; 5999 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6000 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6001 t1++; t2++; t++; 6002 } else if (j1[b1] < j2[b2]) { 6003 j[t] = j1[b1]; 6004 imap1[t1] = t; 6005 b1 += jmap1[t1+1] - jmap1[t1]; 6006 t1++; t++; 6007 } else { 6008 j[t] = j2[b2]; 6009 imap2[t2] = t; 6010 b2 += jmap2[t2+1] - jmap2[t2]; 6011 t2++; t++; 6012 } 6013 } 6014 /* Merge the remaining in either j1[] or j2[] */ 6015 while (b1 < e1) { 6016 j[t] = j1[b1]; 6017 imap1[t1] = t; 6018 b1 += jmap1[t1+1] - jmap1[t1]; 6019 t1++; t++; 6020 } 6021 while (b2 < e2) { 6022 j[t] = j2[b2]; 6023 imap2[t2] = t; 6024 b2 += jmap2[t2+1] - jmap2[t2]; 6025 t2++; t++; 6026 } 6027 i[r+1] = t; 6028 } 6029 PetscFunctionReturn(0); 6030 } 6031 6032 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6033 6034 Input Parameters: 6035 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6036 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6037 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6038 6039 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6040 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6041 6042 Output Parameters: 6043 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6044 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6045 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6046 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6047 6048 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6049 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6050 repeats (i.e., same 'i,j' pair). 6051 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6052 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6053 6054 Atot: number of entries belonging to the diagonal block 6055 Annz: number of unique nonzeros belonging to the diagonal block. 6056 6057 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6058 6059 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6060 */ 6061 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6062 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6063 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6064 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6065 { 6066 PetscInt cstart,cend,rstart,rend,row,col; 6067 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6068 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6069 PetscCount k,m,p,q,r,s,mid; 6070 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6071 6072 PetscFunctionBegin; 6073 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6074 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6075 m = rend - rstart; 6076 6077 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6078 6079 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6080 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6081 */ 6082 while (k<n) { 6083 row = i[k]; 6084 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6085 for (s=k; s<n; s++) if (i[s] != row) break; 6086 for (p=k; p<s; p++) { 6087 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6088 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6089 } 6090 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6091 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6092 rowBegin[row-rstart] = k; 6093 rowMid[row-rstart] = mid; 6094 rowEnd[row-rstart] = s; 6095 6096 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6097 Atot += mid - k; 6098 Btot += s - mid; 6099 6100 /* Count unique nonzeros of this diag/offdiag row */ 6101 for (p=k; p<mid;) { 6102 col = j[p]; 6103 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6104 Annz++; 6105 } 6106 6107 for (p=mid; p<s;) { 6108 col = j[p]; 6109 do {p++;} while (p<s && j[p] == col); 6110 Bnnz++; 6111 } 6112 k = s; 6113 } 6114 6115 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6116 PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap)); 6117 6118 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6119 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6120 for (r=0; r<m; r++) { 6121 k = rowBegin[r]; 6122 mid = rowMid[r]; 6123 s = rowEnd[r]; 6124 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6125 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6126 Atot += mid - k; 6127 Btot += s - mid; 6128 6129 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6130 for (p=k; p<mid;) { 6131 col = j[p]; 6132 q = p; 6133 do {p++;} while (p<mid && j[p] == col); 6134 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6135 Annz++; 6136 } 6137 6138 for (p=mid; p<s;) { 6139 col = j[p]; 6140 q = p; 6141 do {p++;} while (p<s && j[p] == col); 6142 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6143 Bnnz++; 6144 } 6145 } 6146 /* Output */ 6147 *Aperm_ = Aperm; 6148 *Annz_ = Annz; 6149 *Atot_ = Atot; 6150 *Ajmap_ = Ajmap; 6151 *Bperm_ = Bperm; 6152 *Bnnz_ = Bnnz; 6153 *Btot_ = Btot; 6154 *Bjmap_ = Bjmap; 6155 PetscFunctionReturn(0); 6156 } 6157 6158 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6159 { 6160 MPI_Comm comm; 6161 PetscMPIInt rank,size; 6162 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6163 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6164 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6165 6166 PetscFunctionBegin; 6167 PetscCall(PetscFree(mpiaij->garray)); 6168 PetscCall(VecDestroy(&mpiaij->lvec)); 6169 #if defined(PETSC_USE_CTABLE) 6170 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6171 #else 6172 PetscCall(PetscFree(mpiaij->colmap)); 6173 #endif 6174 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6175 mat->assembled = PETSC_FALSE; 6176 mat->was_assembled = PETSC_FALSE; 6177 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6178 6179 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6180 PetscCallMPI(MPI_Comm_size(comm,&size)); 6181 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6182 PetscCall(PetscLayoutSetUp(mat->rmap)); 6183 PetscCall(PetscLayoutSetUp(mat->cmap)); 6184 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6185 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6186 PetscCall(MatGetLocalSize(mat,&m,&n)); 6187 PetscCall(MatGetSize(mat,&M,&N)); 6188 6189 /* ---------------------------------------------------------------------------*/ 6190 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6191 /* entries come first, then local rows, then remote rows. */ 6192 /* ---------------------------------------------------------------------------*/ 6193 PetscCount n1 = coo_n,*perm1; 6194 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6195 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6196 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6197 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6198 for (k=0; k<n1; k++) perm1[k] = k; 6199 6200 /* Manipulate indices so that entries with negative row or col indices will have smallest 6201 row indices, local entries will have greater but negative row indices, and remote entries 6202 will have positive row indices. 6203 */ 6204 for (k=0; k<n1; k++) { 6205 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6206 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6207 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6208 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6209 } 6210 6211 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6212 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6213 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6214 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6215 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6216 6217 /* ---------------------------------------------------------------------------*/ 6218 /* Split local rows into diag/offdiag portions */ 6219 /* ---------------------------------------------------------------------------*/ 6220 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6221 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6222 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6223 6224 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6225 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6226 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6227 6228 /* ---------------------------------------------------------------------------*/ 6229 /* Send remote rows to their owner */ 6230 /* ---------------------------------------------------------------------------*/ 6231 /* Find which rows should be sent to which remote ranks*/ 6232 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6233 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6234 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6235 const PetscInt *ranges; 6236 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6237 6238 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6239 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6240 for (k=rem; k<n1;) { 6241 PetscMPIInt owner; 6242 PetscInt firstRow,lastRow; 6243 6244 /* Locate a row range */ 6245 firstRow = i1[k]; /* first row of this owner */ 6246 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6247 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6248 6249 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6250 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6251 6252 /* All entries in [k,p) belong to this remote owner */ 6253 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6254 PetscMPIInt *sendto2; 6255 PetscInt *nentries2; 6256 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6257 6258 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6259 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6260 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6261 PetscCall(PetscFree2(sendto,nentries2)); 6262 sendto = sendto2; 6263 nentries = nentries2; 6264 maxNsend = maxNsend2; 6265 } 6266 sendto[nsend] = owner; 6267 nentries[nsend] = p - k; 6268 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6269 nsend++; 6270 k = p; 6271 } 6272 6273 /* Build 1st SF to know offsets on remote to send data */ 6274 PetscSF sf1; 6275 PetscInt nroots = 1,nroots2 = 0; 6276 PetscInt nleaves = nsend,nleaves2 = 0; 6277 PetscInt *offsets; 6278 PetscSFNode *iremote; 6279 6280 PetscCall(PetscSFCreate(comm,&sf1)); 6281 PetscCall(PetscMalloc1(nsend,&iremote)); 6282 PetscCall(PetscMalloc1(nsend,&offsets)); 6283 for (k=0; k<nsend; k++) { 6284 iremote[k].rank = sendto[k]; 6285 iremote[k].index = 0; 6286 nleaves2 += nentries[k]; 6287 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6288 } 6289 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6290 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6291 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6292 PetscCall(PetscSFDestroy(&sf1)); 6293 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6294 6295 /* Build 2nd SF to send remote COOs to their owner */ 6296 PetscSF sf2; 6297 nroots = nroots2; 6298 nleaves = nleaves2; 6299 PetscCall(PetscSFCreate(comm,&sf2)); 6300 PetscCall(PetscSFSetFromOptions(sf2)); 6301 PetscCall(PetscMalloc1(nleaves,&iremote)); 6302 p = 0; 6303 for (k=0; k<nsend; k++) { 6304 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6305 for (q=0; q<nentries[k]; q++,p++) { 6306 iremote[p].rank = sendto[k]; 6307 iremote[p].index = offsets[k] + q; 6308 } 6309 } 6310 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6311 6312 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6313 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6314 6315 /* Send the remote COOs to their owner */ 6316 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6317 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6318 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6319 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6320 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6321 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6322 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6323 6324 PetscCall(PetscFree(offsets)); 6325 PetscCall(PetscFree2(sendto,nentries)); 6326 6327 /* ---------------------------------------------------------------*/ 6328 /* Sort received COOs by row along with the permutation array */ 6329 /* ---------------------------------------------------------------*/ 6330 for (k=0; k<n2; k++) perm2[k] = k; 6331 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6332 6333 /* ---------------------------------------------------------------*/ 6334 /* Split received COOs into diag/offdiag portions */ 6335 /* ---------------------------------------------------------------*/ 6336 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6337 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6338 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6339 6340 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6341 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6342 6343 /* --------------------------------------------------------------------------*/ 6344 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6345 /* --------------------------------------------------------------------------*/ 6346 PetscInt *Ai,*Bi; 6347 PetscInt *Aj,*Bj; 6348 6349 PetscCall(PetscMalloc1(m+1,&Ai)); 6350 PetscCall(PetscMalloc1(m+1,&Bi)); 6351 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6352 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6353 6354 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6355 PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2)); 6356 6357 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6358 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6359 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6360 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6361 PetscCall(PetscFree3(i1,j1,perm1)); 6362 PetscCall(PetscFree3(i2,j2,perm2)); 6363 6364 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6365 PetscInt Annz = Ai[m]; 6366 PetscInt Bnnz = Bi[m]; 6367 if (Annz < Annz1 + Annz2) { 6368 PetscInt *Aj_new; 6369 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6370 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6371 PetscCall(PetscFree(Aj)); 6372 Aj = Aj_new; 6373 } 6374 6375 if (Bnnz < Bnnz1 + Bnnz2) { 6376 PetscInt *Bj_new; 6377 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6378 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6379 PetscCall(PetscFree(Bj)); 6380 Bj = Bj_new; 6381 } 6382 6383 /* --------------------------------------------------------------------------------*/ 6384 /* Create new submatrices for on-process and off-process coupling */ 6385 /* --------------------------------------------------------------------------------*/ 6386 PetscScalar *Aa,*Ba; 6387 MatType rtype; 6388 Mat_SeqAIJ *a,*b; 6389 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6390 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6391 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6392 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6393 PetscCall(MatDestroy(&mpiaij->A)); 6394 PetscCall(MatDestroy(&mpiaij->B)); 6395 PetscCall(MatGetRootType_Private(mat,&rtype)); 6396 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6397 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6398 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6399 6400 a = (Mat_SeqAIJ*)mpiaij->A->data; 6401 b = (Mat_SeqAIJ*)mpiaij->B->data; 6402 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6403 a->free_a = b->free_a = PETSC_TRUE; 6404 a->free_ij = b->free_ij = PETSC_TRUE; 6405 6406 /* conversion must happen AFTER multiply setup */ 6407 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6408 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6409 PetscCall(VecDestroy(&mpiaij->lvec)); 6410 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6411 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6412 6413 mpiaij->coo_n = coo_n; 6414 mpiaij->coo_sf = sf2; 6415 mpiaij->sendlen = nleaves; 6416 mpiaij->recvlen = nroots; 6417 6418 mpiaij->Annz1 = Annz1; 6419 mpiaij->Annz2 = Annz2; 6420 mpiaij->Bnnz1 = Bnnz1; 6421 mpiaij->Bnnz2 = Bnnz2; 6422 6423 mpiaij->Atot1 = Atot1; 6424 mpiaij->Atot2 = Atot2; 6425 mpiaij->Btot1 = Btot1; 6426 mpiaij->Btot2 = Btot2; 6427 6428 mpiaij->Aimap1 = Aimap1; 6429 mpiaij->Aimap2 = Aimap2; 6430 mpiaij->Bimap1 = Bimap1; 6431 mpiaij->Bimap2 = Bimap2; 6432 6433 mpiaij->Ajmap1 = Ajmap1; 6434 mpiaij->Ajmap2 = Ajmap2; 6435 mpiaij->Bjmap1 = Bjmap1; 6436 mpiaij->Bjmap2 = Bjmap2; 6437 6438 mpiaij->Aperm1 = Aperm1; 6439 mpiaij->Aperm2 = Aperm2; 6440 mpiaij->Bperm1 = Bperm1; 6441 mpiaij->Bperm2 = Bperm2; 6442 6443 mpiaij->Cperm1 = Cperm1; 6444 6445 /* Allocate in preallocation. If not used, it has zero cost on host */ 6446 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6447 PetscFunctionReturn(0); 6448 } 6449 6450 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6451 { 6452 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6453 Mat A = mpiaij->A,B = mpiaij->B; 6454 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6455 PetscScalar *Aa,*Ba; 6456 PetscScalar *sendbuf = mpiaij->sendbuf; 6457 PetscScalar *recvbuf = mpiaij->recvbuf; 6458 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6459 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6460 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6461 const PetscCount *Cperm1 = mpiaij->Cperm1; 6462 6463 PetscFunctionBegin; 6464 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6465 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6466 if (imode == INSERT_VALUES) { 6467 PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar))); 6468 PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar))); 6469 } 6470 6471 /* Pack entries to be sent to remote */ 6472 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6473 6474 /* Send remote entries to their owner and overlap the communication with local computation */ 6475 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6476 /* Add local entries to A and B */ 6477 for (PetscCount i=0; i<Annz1; i++) { 6478 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6479 } 6480 for (PetscCount i=0; i<Bnnz1; i++) { 6481 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6482 } 6483 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6484 6485 /* Add received remote entries to A and B */ 6486 for (PetscCount i=0; i<Annz2; i++) { 6487 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6488 } 6489 for (PetscCount i=0; i<Bnnz2; i++) { 6490 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6491 } 6492 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6493 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6494 PetscFunctionReturn(0); 6495 } 6496 6497 /* ----------------------------------------------------------------*/ 6498 6499 /*MC 6500 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6501 6502 Options Database Keys: 6503 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6504 6505 Level: beginner 6506 6507 Notes: 6508 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6509 in this case the values associated with the rows and columns one passes in are set to zero 6510 in the matrix 6511 6512 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6513 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6514 6515 .seealso: MatCreateAIJ() 6516 M*/ 6517 6518 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6519 { 6520 Mat_MPIAIJ *b; 6521 PetscMPIInt size; 6522 6523 PetscFunctionBegin; 6524 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6525 6526 PetscCall(PetscNewLog(B,&b)); 6527 B->data = (void*)b; 6528 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6529 B->assembled = PETSC_FALSE; 6530 B->insertmode = NOT_SET_VALUES; 6531 b->size = size; 6532 6533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6534 6535 /* build cache for off array entries formed */ 6536 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6537 6538 b->donotstash = PETSC_FALSE; 6539 b->colmap = NULL; 6540 b->garray = NULL; 6541 b->roworiented = PETSC_TRUE; 6542 6543 /* stuff used for matrix vector multiply */ 6544 b->lvec = NULL; 6545 b->Mvctx = NULL; 6546 6547 /* stuff for MatGetRow() */ 6548 b->rowindices = NULL; 6549 b->rowvalues = NULL; 6550 b->getrowactive = PETSC_FALSE; 6551 6552 /* flexible pointer used in CUSPARSE classes */ 6553 b->spptr = NULL; 6554 6555 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6556 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6557 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6558 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6559 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6560 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6561 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6562 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6563 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6564 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6565 #if defined(PETSC_HAVE_CUDA) 6566 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6567 #endif 6568 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6569 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6570 #endif 6571 #if defined(PETSC_HAVE_MKL_SPARSE) 6572 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6573 #endif 6574 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6575 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6576 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6577 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6578 #if defined(PETSC_HAVE_ELEMENTAL) 6579 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6580 #endif 6581 #if defined(PETSC_HAVE_SCALAPACK) 6582 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6583 #endif 6584 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6585 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6586 #if defined(PETSC_HAVE_HYPRE) 6587 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6588 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6589 #endif 6590 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6591 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6592 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6593 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6594 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6595 PetscFunctionReturn(0); 6596 } 6597 6598 /*@C 6599 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6600 and "off-diagonal" part of the matrix in CSR format. 6601 6602 Collective 6603 6604 Input Parameters: 6605 + comm - MPI communicator 6606 . m - number of local rows (Cannot be PETSC_DECIDE) 6607 . n - This value should be the same as the local size used in creating the 6608 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6609 calculated if N is given) For square matrices n is almost always m. 6610 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6611 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6612 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6613 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6614 . a - matrix values 6615 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6616 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6617 - oa - matrix values 6618 6619 Output Parameter: 6620 . mat - the matrix 6621 6622 Level: advanced 6623 6624 Notes: 6625 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6626 must free the arrays once the matrix has been destroyed and not before. 6627 6628 The i and j indices are 0 based 6629 6630 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6631 6632 This sets local rows and cannot be used to set off-processor values. 6633 6634 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6635 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6636 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6637 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6638 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6639 communication if it is known that only local entries will be set. 6640 6641 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6642 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6643 @*/ 6644 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6645 { 6646 Mat_MPIAIJ *maij; 6647 6648 PetscFunctionBegin; 6649 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6650 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6651 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6652 PetscCall(MatCreate(comm,mat)); 6653 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6654 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6655 maij = (Mat_MPIAIJ*) (*mat)->data; 6656 6657 (*mat)->preallocated = PETSC_TRUE; 6658 6659 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6660 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6661 6662 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6663 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6664 6665 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6666 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6667 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6668 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6669 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6670 PetscFunctionReturn(0); 6671 } 6672 6673 typedef struct { 6674 Mat *mp; /* intermediate products */ 6675 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6676 PetscInt cp; /* number of intermediate products */ 6677 6678 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6679 PetscInt *startsj_s,*startsj_r; 6680 PetscScalar *bufa; 6681 Mat P_oth; 6682 6683 /* may take advantage of merging product->B */ 6684 Mat Bloc; /* B-local by merging diag and off-diag */ 6685 6686 /* cusparse does not have support to split between symbolic and numeric phases. 6687 When api_user is true, we don't need to update the numerical values 6688 of the temporary storage */ 6689 PetscBool reusesym; 6690 6691 /* support for COO values insertion */ 6692 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6693 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6694 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6695 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6696 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6697 PetscMemType mtype; 6698 6699 /* customization */ 6700 PetscBool abmerge; 6701 PetscBool P_oth_bind; 6702 } MatMatMPIAIJBACKEND; 6703 6704 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6705 { 6706 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6707 PetscInt i; 6708 6709 PetscFunctionBegin; 6710 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6711 PetscCall(PetscFree(mmdata->bufa)); 6712 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6713 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6714 PetscCall(MatDestroy(&mmdata->P_oth)); 6715 PetscCall(MatDestroy(&mmdata->Bloc)); 6716 PetscCall(PetscSFDestroy(&mmdata->sf)); 6717 for (i = 0; i < mmdata->cp; i++) { 6718 PetscCall(MatDestroy(&mmdata->mp[i])); 6719 } 6720 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6721 PetscCall(PetscFree(mmdata->own[0])); 6722 PetscCall(PetscFree(mmdata->own)); 6723 PetscCall(PetscFree(mmdata->off[0])); 6724 PetscCall(PetscFree(mmdata->off)); 6725 PetscCall(PetscFree(mmdata)); 6726 PetscFunctionReturn(0); 6727 } 6728 6729 /* Copy selected n entries with indices in idx[] of A to v[]. 6730 If idx is NULL, copy the whole data array of A to v[] 6731 */ 6732 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6733 { 6734 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6735 6736 PetscFunctionBegin; 6737 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6738 if (f) { 6739 PetscCall((*f)(A,n,idx,v)); 6740 } else { 6741 const PetscScalar *vv; 6742 6743 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6744 if (n && idx) { 6745 PetscScalar *w = v; 6746 const PetscInt *oi = idx; 6747 PetscInt j; 6748 6749 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6750 } else { 6751 PetscCall(PetscArraycpy(v,vv,n)); 6752 } 6753 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6754 } 6755 PetscFunctionReturn(0); 6756 } 6757 6758 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6759 { 6760 MatMatMPIAIJBACKEND *mmdata; 6761 PetscInt i,n_d,n_o; 6762 6763 PetscFunctionBegin; 6764 MatCheckProduct(C,1); 6765 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6766 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6767 if (!mmdata->reusesym) { /* update temporary matrices */ 6768 if (mmdata->P_oth) { 6769 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6770 } 6771 if (mmdata->Bloc) { 6772 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6773 } 6774 } 6775 mmdata->reusesym = PETSC_FALSE; 6776 6777 for (i = 0; i < mmdata->cp; i++) { 6778 PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6779 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6780 } 6781 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6782 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6783 6784 if (mmdata->mptmp[i]) continue; 6785 if (noff) { 6786 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6787 6788 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6789 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6790 n_o += noff; 6791 n_d += nown; 6792 } else { 6793 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6794 6795 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6796 n_d += mm->nz; 6797 } 6798 } 6799 if (mmdata->hasoffproc) { /* offprocess insertion */ 6800 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6801 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6802 } 6803 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6804 PetscFunctionReturn(0); 6805 } 6806 6807 /* Support for Pt * A, A * P, or Pt * A * P */ 6808 #define MAX_NUMBER_INTERMEDIATE 4 6809 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6810 { 6811 Mat_Product *product = C->product; 6812 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6813 Mat_MPIAIJ *a,*p; 6814 MatMatMPIAIJBACKEND *mmdata; 6815 ISLocalToGlobalMapping P_oth_l2g = NULL; 6816 IS glob = NULL; 6817 const char *prefix; 6818 char pprefix[256]; 6819 const PetscInt *globidx,*P_oth_idx; 6820 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6821 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6822 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6823 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6824 /* a base offset; type-2: sparse with a local to global map table */ 6825 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6826 6827 MatProductType ptype; 6828 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6829 PetscMPIInt size; 6830 PetscErrorCode ierr; 6831 6832 PetscFunctionBegin; 6833 MatCheckProduct(C,1); 6834 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6835 ptype = product->type; 6836 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6837 ptype = MATPRODUCT_AB; 6838 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6839 } 6840 switch (ptype) { 6841 case MATPRODUCT_AB: 6842 A = product->A; 6843 P = product->B; 6844 m = A->rmap->n; 6845 n = P->cmap->n; 6846 M = A->rmap->N; 6847 N = P->cmap->N; 6848 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6849 break; 6850 case MATPRODUCT_AtB: 6851 P = product->A; 6852 A = product->B; 6853 m = P->cmap->n; 6854 n = A->cmap->n; 6855 M = P->cmap->N; 6856 N = A->cmap->N; 6857 hasoffproc = PETSC_TRUE; 6858 break; 6859 case MATPRODUCT_PtAP: 6860 A = product->A; 6861 P = product->B; 6862 m = P->cmap->n; 6863 n = P->cmap->n; 6864 M = P->cmap->N; 6865 N = P->cmap->N; 6866 hasoffproc = PETSC_TRUE; 6867 break; 6868 default: 6869 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6870 } 6871 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6872 if (size == 1) hasoffproc = PETSC_FALSE; 6873 6874 /* defaults */ 6875 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6876 mp[i] = NULL; 6877 mptmp[i] = PETSC_FALSE; 6878 rmapt[i] = -1; 6879 cmapt[i] = -1; 6880 rmapa[i] = NULL; 6881 cmapa[i] = NULL; 6882 } 6883 6884 /* customization */ 6885 PetscCall(PetscNew(&mmdata)); 6886 mmdata->reusesym = product->api_user; 6887 if (ptype == MATPRODUCT_AB) { 6888 if (product->api_user) { 6889 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");PetscCall(ierr); 6890 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6891 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6892 ierr = PetscOptionsEnd();PetscCall(ierr); 6893 } else { 6894 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");PetscCall(ierr); 6895 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6896 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6897 ierr = PetscOptionsEnd();PetscCall(ierr); 6898 } 6899 } else if (ptype == MATPRODUCT_PtAP) { 6900 if (product->api_user) { 6901 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");PetscCall(ierr); 6902 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6903 ierr = PetscOptionsEnd();PetscCall(ierr); 6904 } else { 6905 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr); 6906 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6907 ierr = PetscOptionsEnd();PetscCall(ierr); 6908 } 6909 } 6910 a = (Mat_MPIAIJ*)A->data; 6911 p = (Mat_MPIAIJ*)P->data; 6912 PetscCall(MatSetSizes(C,m,n,M,N)); 6913 PetscCall(PetscLayoutSetUp(C->rmap)); 6914 PetscCall(PetscLayoutSetUp(C->cmap)); 6915 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6916 PetscCall(MatGetOptionsPrefix(C,&prefix)); 6917 6918 cp = 0; 6919 switch (ptype) { 6920 case MATPRODUCT_AB: /* A * P */ 6921 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6922 6923 /* A_diag * P_local (merged or not) */ 6924 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6925 /* P is product->B */ 6926 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6927 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 6928 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6929 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6930 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6931 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6932 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6933 mp[cp]->product->api_user = product->api_user; 6934 PetscCall(MatProductSetFromOptions(mp[cp])); 6935 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6936 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6937 PetscCall(ISGetIndices(glob,&globidx)); 6938 rmapt[cp] = 1; 6939 cmapt[cp] = 2; 6940 cmapa[cp] = globidx; 6941 mptmp[cp] = PETSC_FALSE; 6942 cp++; 6943 } else { /* A_diag * P_diag and A_diag * P_off */ 6944 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 6945 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6946 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6947 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6948 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6949 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6950 mp[cp]->product->api_user = product->api_user; 6951 PetscCall(MatProductSetFromOptions(mp[cp])); 6952 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6953 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6954 rmapt[cp] = 1; 6955 cmapt[cp] = 1; 6956 mptmp[cp] = PETSC_FALSE; 6957 cp++; 6958 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 6959 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6960 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6961 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6962 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6963 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6964 mp[cp]->product->api_user = product->api_user; 6965 PetscCall(MatProductSetFromOptions(mp[cp])); 6966 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6967 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6968 rmapt[cp] = 1; 6969 cmapt[cp] = 2; 6970 cmapa[cp] = p->garray; 6971 mptmp[cp] = PETSC_FALSE; 6972 cp++; 6973 } 6974 6975 /* A_off * P_other */ 6976 if (mmdata->P_oth) { 6977 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 6978 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 6979 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 6980 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 6981 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 6982 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6983 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6984 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6985 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6986 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6987 mp[cp]->product->api_user = product->api_user; 6988 PetscCall(MatProductSetFromOptions(mp[cp])); 6989 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6990 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6991 rmapt[cp] = 1; 6992 cmapt[cp] = 2; 6993 cmapa[cp] = P_oth_idx; 6994 mptmp[cp] = PETSC_FALSE; 6995 cp++; 6996 } 6997 break; 6998 6999 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7000 /* A is product->B */ 7001 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7002 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7003 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7004 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7005 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7006 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7007 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7008 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7009 mp[cp]->product->api_user = product->api_user; 7010 PetscCall(MatProductSetFromOptions(mp[cp])); 7011 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7012 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7013 PetscCall(ISGetIndices(glob,&globidx)); 7014 rmapt[cp] = 2; 7015 rmapa[cp] = globidx; 7016 cmapt[cp] = 2; 7017 cmapa[cp] = globidx; 7018 mptmp[cp] = PETSC_FALSE; 7019 cp++; 7020 } else { 7021 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7022 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7023 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7024 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7025 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7026 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7027 mp[cp]->product->api_user = product->api_user; 7028 PetscCall(MatProductSetFromOptions(mp[cp])); 7029 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7030 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7031 PetscCall(ISGetIndices(glob,&globidx)); 7032 rmapt[cp] = 1; 7033 cmapt[cp] = 2; 7034 cmapa[cp] = globidx; 7035 mptmp[cp] = PETSC_FALSE; 7036 cp++; 7037 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7038 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7039 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7040 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7041 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7042 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7043 mp[cp]->product->api_user = product->api_user; 7044 PetscCall(MatProductSetFromOptions(mp[cp])); 7045 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7046 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7047 rmapt[cp] = 2; 7048 rmapa[cp] = p->garray; 7049 cmapt[cp] = 2; 7050 cmapa[cp] = globidx; 7051 mptmp[cp] = PETSC_FALSE; 7052 cp++; 7053 } 7054 break; 7055 case MATPRODUCT_PtAP: 7056 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7057 /* P is product->B */ 7058 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7059 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7060 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7061 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7062 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7063 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7064 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7065 mp[cp]->product->api_user = product->api_user; 7066 PetscCall(MatProductSetFromOptions(mp[cp])); 7067 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7068 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7069 PetscCall(ISGetIndices(glob,&globidx)); 7070 rmapt[cp] = 2; 7071 rmapa[cp] = globidx; 7072 cmapt[cp] = 2; 7073 cmapa[cp] = globidx; 7074 mptmp[cp] = PETSC_FALSE; 7075 cp++; 7076 if (mmdata->P_oth) { 7077 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7078 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7079 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7080 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7081 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7082 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7083 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7084 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7085 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7086 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7087 mp[cp]->product->api_user = product->api_user; 7088 PetscCall(MatProductSetFromOptions(mp[cp])); 7089 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7090 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7091 mptmp[cp] = PETSC_TRUE; 7092 cp++; 7093 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7094 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7095 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7096 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7097 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7098 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7099 mp[cp]->product->api_user = product->api_user; 7100 PetscCall(MatProductSetFromOptions(mp[cp])); 7101 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7102 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7103 rmapt[cp] = 2; 7104 rmapa[cp] = globidx; 7105 cmapt[cp] = 2; 7106 cmapa[cp] = P_oth_idx; 7107 mptmp[cp] = PETSC_FALSE; 7108 cp++; 7109 } 7110 break; 7111 default: 7112 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7113 } 7114 /* sanity check */ 7115 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7116 7117 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7118 for (i = 0; i < cp; i++) { 7119 mmdata->mp[i] = mp[i]; 7120 mmdata->mptmp[i] = mptmp[i]; 7121 } 7122 mmdata->cp = cp; 7123 C->product->data = mmdata; 7124 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7125 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7126 7127 /* memory type */ 7128 mmdata->mtype = PETSC_MEMTYPE_HOST; 7129 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7130 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7131 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7132 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7133 7134 /* prepare coo coordinates for values insertion */ 7135 7136 /* count total nonzeros of those intermediate seqaij Mats 7137 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7138 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7139 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7140 */ 7141 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7142 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7143 if (mptmp[cp]) continue; 7144 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7145 const PetscInt *rmap = rmapa[cp]; 7146 const PetscInt mr = mp[cp]->rmap->n; 7147 const PetscInt rs = C->rmap->rstart; 7148 const PetscInt re = C->rmap->rend; 7149 const PetscInt *ii = mm->i; 7150 for (i = 0; i < mr; i++) { 7151 const PetscInt gr = rmap[i]; 7152 const PetscInt nz = ii[i+1] - ii[i]; 7153 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7154 else ncoo_oown += nz; /* this row is local */ 7155 } 7156 } else ncoo_d += mm->nz; 7157 } 7158 7159 /* 7160 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7161 7162 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7163 7164 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7165 7166 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7167 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7168 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7169 7170 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7171 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7172 */ 7173 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7174 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7175 7176 /* gather (i,j) of nonzeros inserted by remote procs */ 7177 if (hasoffproc) { 7178 PetscSF msf; 7179 PetscInt ncoo2,*coo_i2,*coo_j2; 7180 7181 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7182 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7183 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7184 7185 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7186 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7187 PetscInt *idxoff = mmdata->off[cp]; 7188 PetscInt *idxown = mmdata->own[cp]; 7189 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7190 const PetscInt *rmap = rmapa[cp]; 7191 const PetscInt *cmap = cmapa[cp]; 7192 const PetscInt *ii = mm->i; 7193 PetscInt *coi = coo_i + ncoo_o; 7194 PetscInt *coj = coo_j + ncoo_o; 7195 const PetscInt mr = mp[cp]->rmap->n; 7196 const PetscInt rs = C->rmap->rstart; 7197 const PetscInt re = C->rmap->rend; 7198 const PetscInt cs = C->cmap->rstart; 7199 for (i = 0; i < mr; i++) { 7200 const PetscInt *jj = mm->j + ii[i]; 7201 const PetscInt gr = rmap[i]; 7202 const PetscInt nz = ii[i+1] - ii[i]; 7203 if (gr < rs || gr >= re) { /* this is an offproc row */ 7204 for (j = ii[i]; j < ii[i+1]; j++) { 7205 *coi++ = gr; 7206 *idxoff++ = j; 7207 } 7208 if (!cmapt[cp]) { /* already global */ 7209 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7210 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7211 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7212 } else { /* offdiag */ 7213 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7214 } 7215 ncoo_o += nz; 7216 } else { /* this is a local row */ 7217 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7218 } 7219 } 7220 } 7221 mmdata->off[cp + 1] = idxoff; 7222 mmdata->own[cp + 1] = idxown; 7223 } 7224 7225 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7226 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7227 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7228 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7229 ncoo = ncoo_d + ncoo_oown + ncoo2; 7230 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7231 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7232 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7233 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7234 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7235 PetscCall(PetscFree2(coo_i,coo_j)); 7236 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7237 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7238 coo_i = coo_i2; 7239 coo_j = coo_j2; 7240 } else { /* no offproc values insertion */ 7241 ncoo = ncoo_d; 7242 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7243 7244 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7245 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7246 PetscCall(PetscSFSetUp(mmdata->sf)); 7247 } 7248 mmdata->hasoffproc = hasoffproc; 7249 7250 /* gather (i,j) of nonzeros inserted locally */ 7251 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7252 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7253 PetscInt *coi = coo_i + ncoo_d; 7254 PetscInt *coj = coo_j + ncoo_d; 7255 const PetscInt *jj = mm->j; 7256 const PetscInt *ii = mm->i; 7257 const PetscInt *cmap = cmapa[cp]; 7258 const PetscInt *rmap = rmapa[cp]; 7259 const PetscInt mr = mp[cp]->rmap->n; 7260 const PetscInt rs = C->rmap->rstart; 7261 const PetscInt re = C->rmap->rend; 7262 const PetscInt cs = C->cmap->rstart; 7263 7264 if (mptmp[cp]) continue; 7265 if (rmapt[cp] == 1) { /* consecutive rows */ 7266 /* fill coo_i */ 7267 for (i = 0; i < mr; i++) { 7268 const PetscInt gr = i + rs; 7269 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7270 } 7271 /* fill coo_j */ 7272 if (!cmapt[cp]) { /* type-0, already global */ 7273 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7274 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7275 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7276 } else { /* type-2, local to global for sparse columns */ 7277 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7278 } 7279 ncoo_d += mm->nz; 7280 } else if (rmapt[cp] == 2) { /* sparse rows */ 7281 for (i = 0; i < mr; i++) { 7282 const PetscInt *jj = mm->j + ii[i]; 7283 const PetscInt gr = rmap[i]; 7284 const PetscInt nz = ii[i+1] - ii[i]; 7285 if (gr >= rs && gr < re) { /* local rows */ 7286 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7287 if (!cmapt[cp]) { /* type-0, already global */ 7288 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7289 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7290 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7291 } else { /* type-2, local to global for sparse columns */ 7292 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7293 } 7294 ncoo_d += nz; 7295 } 7296 } 7297 } 7298 } 7299 if (glob) { 7300 PetscCall(ISRestoreIndices(glob,&globidx)); 7301 } 7302 PetscCall(ISDestroy(&glob)); 7303 if (P_oth_l2g) { 7304 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7305 } 7306 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7307 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7308 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7309 7310 /* preallocate with COO data */ 7311 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7312 PetscCall(PetscFree2(coo_i,coo_j)); 7313 PetscFunctionReturn(0); 7314 } 7315 7316 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7317 { 7318 Mat_Product *product = mat->product; 7319 #if defined(PETSC_HAVE_DEVICE) 7320 PetscBool match = PETSC_FALSE; 7321 PetscBool usecpu = PETSC_FALSE; 7322 #else 7323 PetscBool match = PETSC_TRUE; 7324 #endif 7325 7326 PetscFunctionBegin; 7327 MatCheckProduct(mat,1); 7328 #if defined(PETSC_HAVE_DEVICE) 7329 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7330 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7331 } 7332 if (match) { /* we can always fallback to the CPU if requested */ 7333 PetscErrorCode ierr; 7334 switch (product->type) { 7335 case MATPRODUCT_AB: 7336 if (product->api_user) { 7337 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");PetscCall(ierr); 7338 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7339 ierr = PetscOptionsEnd();PetscCall(ierr); 7340 } else { 7341 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");PetscCall(ierr); 7342 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7343 ierr = PetscOptionsEnd();PetscCall(ierr); 7344 } 7345 break; 7346 case MATPRODUCT_AtB: 7347 if (product->api_user) { 7348 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");PetscCall(ierr); 7349 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7350 ierr = PetscOptionsEnd();PetscCall(ierr); 7351 } else { 7352 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");PetscCall(ierr); 7353 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7354 ierr = PetscOptionsEnd();PetscCall(ierr); 7355 } 7356 break; 7357 case MATPRODUCT_PtAP: 7358 if (product->api_user) { 7359 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");PetscCall(ierr); 7360 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7361 ierr = PetscOptionsEnd();PetscCall(ierr); 7362 } else { 7363 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr); 7364 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7365 ierr = PetscOptionsEnd();PetscCall(ierr); 7366 } 7367 break; 7368 default: 7369 break; 7370 } 7371 match = (PetscBool)!usecpu; 7372 } 7373 #endif 7374 if (match) { 7375 switch (product->type) { 7376 case MATPRODUCT_AB: 7377 case MATPRODUCT_AtB: 7378 case MATPRODUCT_PtAP: 7379 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7380 break; 7381 default: 7382 break; 7383 } 7384 } 7385 /* fallback to MPIAIJ ops */ 7386 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7387 PetscFunctionReturn(0); 7388 } 7389 7390 /* 7391 Special version for direct calls from Fortran 7392 */ 7393 #include <petsc/private/fortranimpl.h> 7394 7395 /* Change these macros so can be used in void function */ 7396 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7397 #undef PetscCall 7398 #define PetscCall(...) do { \ 7399 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7400 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7401 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7402 return; \ 7403 } \ 7404 } while (0) 7405 7406 #undef SETERRQ 7407 #define SETERRQ(comm,ierr,...) do { \ 7408 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7409 return; \ 7410 } while (0) 7411 7412 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7413 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7414 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7415 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7416 #else 7417 #endif 7418 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7419 { 7420 Mat mat = *mmat; 7421 PetscInt m = *mm, n = *mn; 7422 InsertMode addv = *maddv; 7423 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7424 PetscScalar value; 7425 7426 MatCheckPreallocated(mat,1); 7427 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7428 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7429 { 7430 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7431 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7432 PetscBool roworiented = aij->roworiented; 7433 7434 /* Some Variables required in the macro */ 7435 Mat A = aij->A; 7436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7437 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7438 MatScalar *aa; 7439 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7440 Mat B = aij->B; 7441 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7442 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7443 MatScalar *ba; 7444 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7445 * cannot use "#if defined" inside a macro. */ 7446 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7447 7448 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7449 PetscInt nonew = a->nonew; 7450 MatScalar *ap1,*ap2; 7451 7452 PetscFunctionBegin; 7453 PetscCall(MatSeqAIJGetArray(A,&aa)); 7454 PetscCall(MatSeqAIJGetArray(B,&ba)); 7455 for (i=0; i<m; i++) { 7456 if (im[i] < 0) continue; 7457 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7458 if (im[i] >= rstart && im[i] < rend) { 7459 row = im[i] - rstart; 7460 lastcol1 = -1; 7461 rp1 = aj + ai[row]; 7462 ap1 = aa + ai[row]; 7463 rmax1 = aimax[row]; 7464 nrow1 = ailen[row]; 7465 low1 = 0; 7466 high1 = nrow1; 7467 lastcol2 = -1; 7468 rp2 = bj + bi[row]; 7469 ap2 = ba + bi[row]; 7470 rmax2 = bimax[row]; 7471 nrow2 = bilen[row]; 7472 low2 = 0; 7473 high2 = nrow2; 7474 7475 for (j=0; j<n; j++) { 7476 if (roworiented) value = v[i*n+j]; 7477 else value = v[i+j*m]; 7478 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7479 if (in[j] >= cstart && in[j] < cend) { 7480 col = in[j] - cstart; 7481 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7482 } else if (in[j] < 0) continue; 7483 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7484 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7485 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7486 } else { 7487 if (mat->was_assembled) { 7488 if (!aij->colmap) { 7489 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7490 } 7491 #if defined(PETSC_USE_CTABLE) 7492 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7493 col--; 7494 #else 7495 col = aij->colmap[in[j]] - 1; 7496 #endif 7497 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7498 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7499 col = in[j]; 7500 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7501 B = aij->B; 7502 b = (Mat_SeqAIJ*)B->data; 7503 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7504 rp2 = bj + bi[row]; 7505 ap2 = ba + bi[row]; 7506 rmax2 = bimax[row]; 7507 nrow2 = bilen[row]; 7508 low2 = 0; 7509 high2 = nrow2; 7510 bm = aij->B->rmap->n; 7511 ba = b->a; 7512 inserted = PETSC_FALSE; 7513 } 7514 } else col = in[j]; 7515 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7516 } 7517 } 7518 } else if (!aij->donotstash) { 7519 if (roworiented) { 7520 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7521 } else { 7522 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7523 } 7524 } 7525 } 7526 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7527 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7528 } 7529 PetscFunctionReturnVoid(); 7530 } 7531 /* Undefining these here since they were redefined from their original definition above! No 7532 * other PETSc functions should be defined past this point, as it is impossible to recover the 7533 * original definitions */ 7534 #undef PetscCall 7535 #undef SETERRQ 7536