1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 51 PetscFunctionBegin; 52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53 A->boundtocpu = flg; 54 #endif 55 if (a->A) { 56 PetscCall(MatBindToCPU(a->A,flg)); 57 } 58 if (a->B) { 59 PetscCall(MatBindToCPU(a->B,flg)); 60 } 61 62 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 63 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 64 * to differ from the parent matrix. */ 65 if (a->lvec) { 66 PetscCall(VecBindToCPU(a->lvec,flg)); 67 } 68 if (a->diag) { 69 PetscCall(VecBindToCPU(a->diag,flg)); 70 } 71 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 76 { 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 78 79 PetscFunctionBegin; 80 if (mat->A) { 81 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 82 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 83 } 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 88 { 89 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 90 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 91 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 92 const PetscInt *ia,*ib; 93 const MatScalar *aa,*bb,*aav,*bav; 94 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 95 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 96 97 PetscFunctionBegin; 98 *keptrows = NULL; 99 100 ia = a->i; 101 ib = b->i; 102 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 103 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 104 for (i=0; i<m; i++) { 105 na = ia[i+1] - ia[i]; 106 nb = ib[i+1] - ib[i]; 107 if (!na && !nb) { 108 cnt++; 109 goto ok1; 110 } 111 aa = aav + ia[i]; 112 for (j=0; j<na; j++) { 113 if (aa[j] != 0.0) goto ok1; 114 } 115 bb = bav + ib[i]; 116 for (j=0; j <nb; j++) { 117 if (bb[j] != 0.0) goto ok1; 118 } 119 cnt++; 120 ok1:; 121 } 122 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123 if (!n0rows) { 124 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126 PetscFunctionReturn(0); 127 } 128 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 129 cnt = 0; 130 for (i=0; i<m; i++) { 131 na = ia[i+1] - ia[i]; 132 nb = ib[i+1] - ib[i]; 133 if (!na && !nb) continue; 134 aa = aav + ia[i]; 135 for (j=0; j<na;j++) { 136 if (aa[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 bb = bav + ib[i]; 142 for (j=0; j<nb; j++) { 143 if (bb[j] != 0.0) { 144 rows[cnt++] = rstart + i; 145 goto ok2; 146 } 147 } 148 ok2:; 149 } 150 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 151 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 152 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 159 PetscBool cong; 160 161 PetscFunctionBegin; 162 PetscCall(MatHasCongruentLayouts(Y,&cong)); 163 if (Y->assembled && cong) { 164 PetscCall(MatDiagonalSet(aij->A,D,is)); 165 } else { 166 PetscCall(MatDiagonalSet_Default(Y,D,is)); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174 PetscInt i,rstart,nrows,*rows; 175 176 PetscFunctionBegin; 177 *zrows = NULL; 178 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 179 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180 for (i=0; i<nrows; i++) rows[i] += rstart; 181 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188 PetscInt i,m,n,*garray = aij->garray; 189 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 190 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 191 PetscReal *work; 192 const PetscScalar *dummy; 193 194 PetscFunctionBegin; 195 PetscCall(MatGetSize(A,&m,&n)); 196 PetscCall(PetscCalloc1(n,&work)); 197 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 198 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 199 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 200 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201 if (type == NORM_2) { 202 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 203 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 204 } 205 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 206 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 207 } 208 } else if (type == NORM_1) { 209 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 210 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 211 } 212 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 213 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } 215 } else if (type == NORM_INFINITY) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 221 } 222 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } 229 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235 } 236 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237 if (type == NORM_INFINITY) { 238 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 239 } else { 240 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 241 } 242 PetscCall(PetscFree(work)); 243 if (type == NORM_2) { 244 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246 for (i=0; i<n; i++) reductions[i] /= m; 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252 { 253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254 IS sis,gis; 255 const PetscInt *isis,*igis; 256 PetscInt n,*iis,nsis,ngis,rstart,i; 257 258 PetscFunctionBegin; 259 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 260 PetscCall(MatFindNonzeroRows(a->B,&gis)); 261 PetscCall(ISGetSize(gis,&ngis)); 262 PetscCall(ISGetSize(sis,&nsis)); 263 PetscCall(ISGetIndices(sis,&isis)); 264 PetscCall(ISGetIndices(gis,&igis)); 265 266 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 267 PetscCall(PetscArraycpy(iis,igis,ngis)); 268 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269 n = ngis + nsis; 270 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 271 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272 for (i=0; i<n; i++) iis[i] += rstart; 273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274 275 PetscCall(ISRestoreIndices(sis,&isis)); 276 PetscCall(ISRestoreIndices(gis,&igis)); 277 PetscCall(ISDestroy(&sis)); 278 PetscCall(ISDestroy(&gis)); 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Local utility routine that creates a mapping from the global column 284 number to the local number in the off-diagonal part of the local 285 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 286 a slightly higher hash table cost; without it it is not scalable (each processor 287 has an order N integer array but is fast to access. 288 */ 289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 290 { 291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292 PetscInt n = aij->B->cmap->n,i; 293 294 PetscFunctionBegin; 295 PetscCheckFalse(n && !aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298 for (i=0; i<n; i++) { 299 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300 } 301 #else 302 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 303 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305 #endif 306 PetscFunctionReturn(0); 307 } 308 309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 310 { \ 311 if (col <= lastcol1) low1 = 0; \ 312 else high1 = nrow1; \ 313 lastcol1 = col;\ 314 while (high1-low1 > 5) { \ 315 t = (low1+high1)/2; \ 316 if (rp1[t] > col) high1 = t; \ 317 else low1 = t; \ 318 } \ 319 for (_i=low1; _i<high1; _i++) { \ 320 if (rp1[_i] > col) break; \ 321 if (rp1[_i] == col) { \ 322 if (addv == ADD_VALUES) { \ 323 ap1[_i] += value; \ 324 /* Not sure LogFlops will slow dow the code or not */ \ 325 (void)PetscLogFlops(1.0); \ 326 } \ 327 else ap1[_i] = value; \ 328 goto a_noinsert; \ 329 } \ 330 } \ 331 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 333 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335 N = nrow1++ - 1; a->nz++; high1++; \ 336 /* shift up all the later entries in this row */ \ 337 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 338 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339 rp1[_i] = col; \ 340 ap1[_i] = value; \ 341 A->nonzerostate++;\ 342 a_noinsert: ; \ 343 ailen[row] = nrow1; \ 344 } 345 346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 347 { \ 348 if (col <= lastcol2) low2 = 0; \ 349 else high2 = nrow2; \ 350 lastcol2 = col; \ 351 while (high2-low2 > 5) { \ 352 t = (low2+high2)/2; \ 353 if (rp2[t] > col) high2 = t; \ 354 else low2 = t; \ 355 } \ 356 for (_i=low2; _i<high2; _i++) { \ 357 if (rp2[_i] > col) break; \ 358 if (rp2[_i] == col) { \ 359 if (addv == ADD_VALUES) { \ 360 ap2[_i] += value; \ 361 (void)PetscLogFlops(1.0); \ 362 } \ 363 else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 369 PetscCheckFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371 N = nrow2++ - 1; b->nz++; high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 374 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert: ; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 383 { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 386 PetscInt l,*garray = mat->garray,diag; 387 PetscScalar *aa,*ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 394 row = row - diag; 395 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 396 if (garray[b->j[b->i[row]+l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 400 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row+1]-a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 407 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row+1]-b->i[row]-l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 421 { 422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423 PetscScalar value = 0.0; 424 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426 PetscBool roworiented = aij->roworiented; 427 428 /* Some Variables required in the macro */ 429 Mat A = aij->A; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432 PetscBool ignorezeroentries = a->ignorezeroentries; 433 Mat B = aij->B; 434 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436 MatScalar *aa,*ba; 437 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 438 PetscInt nonew; 439 MatScalar *ap1,*ap2; 440 441 PetscFunctionBegin; 442 PetscCall(MatSeqAIJGetArray(A,&aa)); 443 PetscCall(MatSeqAIJGetArray(B,&ba)); 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 PetscCheckFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 } else if (in[j] < 0) continue; 472 else PetscCheckFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477 } 478 #if defined(PETSC_USE_CTABLE) 479 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 485 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 500 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 501 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 502 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 503 } 504 } else col = in[j]; 505 nonew = b->nonew; 506 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 507 } 508 } 509 } else { 510 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515 } else { 516 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } 518 } 519 } 520 } 521 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 522 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 528 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 530 */ 531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539 PetscInt *ailen = a->ilen,*aj = a->j; 540 PetscInt *bilen = b->ilen,*bj = b->j; 541 PetscInt am = aij->A->rmap->n,j; 542 PetscInt diag_so_far = 0,dnz; 543 PetscInt offd_so_far = 0,onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j=0; j<am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 581 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen,*aj = a->j; 583 PetscInt *bilen = b->ilen,*bj = b->j; 584 PetscInt am = aij->A->rmap->n,j; 585 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587 PetscScalar *aa = a->a,*ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j=0; j<am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag+dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd+onz_row] = mat_j[col]; 604 ba[rowstart_offd+onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619 620 PetscFunctionBegin; 621 for (i=0; i<m; i++) { 622 if (idxm[i] < 0) continue; /* negative row */ 623 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624 if (idxm[i] >= rstart && idxm[i] < rend) { 625 row = idxm[i] - rstart; 626 for (j=0; j<n; j++) { 627 if (idxn[j] < 0) continue; /* negative column */ 628 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629 if (idxn[j] >= cstart && idxn[j] < cend) { 630 col = idxn[j] - cstart; 631 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632 } else { 633 if (!aij->colmap) { 634 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635 } 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638 col--; 639 #else 640 col = aij->colmap[idxn[j]] - 1; 641 #endif 642 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643 else { 644 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 662 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 663 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693 i = j; 694 } 695 } 696 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 703 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 704 } 705 #endif 706 PetscCall(MatAssemblyBegin(aij->A,mode)); 707 PetscCall(MatAssemblyEnd(aij->A,mode)); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 718 PetscCall(MatDisAssemble_MPIAIJ(mat)); 719 } 720 } 721 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723 } 724 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 725 #if defined(PETSC_HAVE_DEVICE) 726 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727 #endif 728 PetscCall(MatAssemblyBegin(aij->B,mode)); 729 PetscCall(MatAssemblyEnd(aij->B,mode)); 730 731 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 732 733 aij->rowvalues = NULL; 734 735 PetscCall(VecDestroy(&aij->diag)); 736 737 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 738 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 740 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741 } 742 #if defined(PETSC_HAVE_DEVICE) 743 mat->offloadmask = PETSC_OFFLOAD_BOTH; 744 #endif 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 749 { 750 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 751 752 PetscFunctionBegin; 753 PetscCall(MatZeroEntries(l->A)); 754 PetscCall(MatZeroEntries(l->B)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 759 { 760 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761 PetscObjectState sA, sB; 762 PetscInt *lrows; 763 PetscInt r, len; 764 PetscBool cong, lch, gch; 765 766 PetscFunctionBegin; 767 /* get locally owned rows */ 768 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 769 PetscCall(MatHasCongruentLayouts(A,&cong)); 770 /* fix right hand side if needed */ 771 if (x && b) { 772 const PetscScalar *xx; 773 PetscScalar *bb; 774 775 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 776 PetscCall(VecGetArrayRead(x, &xx)); 777 PetscCall(VecGetArray(b, &bb)); 778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 779 PetscCall(VecRestoreArrayRead(x, &xx)); 780 PetscCall(VecRestoreArray(b, &bb)); 781 } 782 783 sA = mat->A->nonzerostate; 784 sB = mat->B->nonzerostate; 785 786 if (diag != 0.0 && cong) { 787 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 788 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792 PetscInt nnwA, nnwB; 793 PetscBool nnzA, nnzB; 794 795 nnwA = aijA->nonew; 796 nnwB = aijB->nonew; 797 nnzA = aijA->keepnonzeropattern; 798 nnzB = aijB->keepnonzeropattern; 799 if (!nnzA) { 800 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801 aijA->nonew = 0; 802 } 803 if (!nnzB) { 804 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805 aijB->nonew = 0; 806 } 807 /* Must zero here before the next loop */ 808 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 809 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 810 for (r = 0; r < len; ++r) { 811 const PetscInt row = lrows[r] + A->rmap->rstart; 812 if (row >= A->cmap->N) continue; 813 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814 } 815 aijA->nonew = nnwA; 816 aijB->nonew = nnwB; 817 } else { 818 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 819 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 820 } 821 PetscCall(PetscFree(lrows)); 822 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 823 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 824 825 /* reduce nonzerostate */ 826 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 827 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828 if (gch) A->nonzerostate++; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 833 { 834 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 835 PetscMPIInt n = A->rmap->n; 836 PetscInt i,j,r,m,len = 0; 837 PetscInt *lrows,*owners = A->rmap->range; 838 PetscMPIInt p = 0; 839 PetscSFNode *rrows; 840 PetscSF sf; 841 const PetscScalar *xx; 842 PetscScalar *bb,*mask,*aij_a; 843 Vec xmask,lmask; 844 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845 const PetscInt *aj, *ii,*ridx; 846 PetscScalar *aa; 847 848 PetscFunctionBegin; 849 /* Create SF where leaves are input rows and roots are owned rows */ 850 PetscCall(PetscMalloc1(n, &lrows)); 851 for (r = 0; r < n; ++r) lrows[r] = -1; 852 PetscCall(PetscMalloc1(N, &rrows)); 853 for (r = 0; r < N; ++r) { 854 const PetscInt idx = rows[r]; 855 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 856 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 857 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 858 } 859 rrows[r].rank = p; 860 rrows[r].index = rows[r] - owners[p]; 861 } 862 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 863 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 864 /* Collect flags for rows to be zeroed */ 865 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 866 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 867 PetscCall(PetscSFDestroy(&sf)); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872 /* handle off diagonal part of matrix */ 873 PetscCall(MatCreateVecs(A,&xmask,NULL)); 874 PetscCall(VecDuplicate(l->lvec,&lmask)); 875 PetscCall(VecGetArray(xmask,&bb)); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 PetscCall(VecRestoreArray(xmask,&bb)); 878 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 880 PetscCall(VecDestroy(&xmask)); 881 if (x && b) { /* this code is buggy when the row and column layout don't match */ 882 PetscBool cong; 883 884 PetscCall(MatHasCongruentLayouts(A,&cong)); 885 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 886 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 888 PetscCall(VecGetArrayRead(l->lvec,&xx)); 889 PetscCall(VecGetArray(b,&bb)); 890 } 891 PetscCall(VecGetArray(lmask,&mask)); 892 /* remove zeroed rows of off diagonal matrix */ 893 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894 ii = aij->i; 895 for (i=0; i<len; i++) { 896 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 897 } 898 /* loop over all elements of off process part of matrix zeroing removed columns*/ 899 if (aij->compressedrow.use) { 900 m = aij->compressedrow.nrows; 901 ii = aij->compressedrow.i; 902 ridx = aij->compressedrow.rindex; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij_a + ii[i]; 907 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[*ridx] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 ridx++; 917 } 918 } else { /* do not use compressed row format */ 919 m = l->B->rmap->n; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij_a + ii[i]; 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[i] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 } 933 } 934 if (x && b) { 935 PetscCall(VecRestoreArray(b,&bb)); 936 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937 } 938 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 939 PetscCall(VecRestoreArray(lmask,&mask)); 940 PetscCall(VecDestroy(&lmask)); 941 PetscCall(PetscFree(lrows)); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscInt nt; 955 VecScatter Mvctx = a->Mvctx; 956 957 PetscFunctionBegin; 958 PetscCall(VecGetLocalSize(xx,&nt)); 959 PetscCheckFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 960 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 961 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 962 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 963 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 971 PetscFunctionBegin; 972 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 VecScatter Mvctx = a->Mvctx; 980 981 PetscFunctionBegin; 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 /* do nondiagonal part */ 995 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996 /* do local part */ 997 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 998 /* add partial results together */ 999 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1000 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005 { 1006 MPI_Comm comm; 1007 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1008 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009 IS Me,Notme; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscBool lf; 1012 PetscMPIInt size; 1013 1014 PetscFunctionBegin; 1015 /* Easy test: symmetric diagonal block */ 1016 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1017 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1018 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019 if (!*f) PetscFunctionReturn(0); 1020 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1021 PetscCallMPI(MPI_Comm_size(comm,&size)); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 PetscCall(MatGetSize(Amat,&M,&N)); 1026 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1027 PetscCall(PetscMalloc1(N-last+first,¬me)); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1031 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1032 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1033 Aoff = Aoffs[0]; 1034 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1035 Boff = Boffs[0]; 1036 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1037 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1038 PetscCall(MatDestroyMatrices(1,&Boffs)); 1039 PetscCall(ISDestroy(&Me)); 1040 PetscCall(ISDestroy(&Notme)); 1041 PetscCall(PetscFree(notme)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscFunctionBegin; 1048 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059 /* do local part */ 1060 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1061 /* add partial results together */ 1062 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1063 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheckFalse(A->rmap->N != A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 PetscCall(MatGetDiagonal(a->A,v)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 1086 PetscFunctionBegin; 1087 PetscCall(MatScale(a->A,aa)); 1088 PetscCall(MatScale(a->B,aa)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094 { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099 PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1)); 1100 PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2)); 1101 PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2)); 1102 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1103 PetscCall(PetscFree(aij->Cperm1)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1114 #endif 1115 PetscCall(MatStashDestroy_Private(&mat->stash)); 1116 PetscCall(VecDestroy(&aij->diag)); 1117 PetscCall(MatDestroy(&aij->A)); 1118 PetscCall(MatDestroy(&aij->B)); 1119 #if defined(PETSC_USE_CTABLE) 1120 PetscCall(PetscTableDestroy(&aij->colmap)); 1121 #else 1122 PetscCall(PetscFree(aij->colmap)); 1123 #endif 1124 PetscCall(PetscFree(aij->garray)); 1125 PetscCall(VecDestroy(&aij->lvec)); 1126 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1127 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1128 PetscCall(PetscFree(aij->ld)); 1129 1130 /* Free COO */ 1131 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1132 1133 PetscCall(PetscFree(mat->data)); 1134 1135 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1136 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1137 1138 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1139 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1140 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1141 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1142 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1143 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1144 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1145 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1146 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1147 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1148 #if defined(PETSC_HAVE_CUDA) 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1150 #endif 1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1153 #endif 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1157 #endif 1158 #if defined(PETSC_HAVE_SCALAPACK) 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1160 #endif 1161 #if defined(PETSC_HAVE_HYPRE) 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1164 #endif 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1171 #if defined(PETSC_HAVE_MKL_SPARSE) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1173 #endif 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1185 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1186 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1187 const PetscInt *garray = aij->garray; 1188 const PetscScalar *aa,*ba; 1189 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1190 PetscInt *rowlens; 1191 PetscInt *colidxs; 1192 PetscScalar *matvals; 1193 1194 PetscFunctionBegin; 1195 PetscCall(PetscViewerSetUp(viewer)); 1196 1197 M = mat->rmap->N; 1198 N = mat->cmap->N; 1199 m = mat->rmap->n; 1200 rs = mat->rmap->rstart; 1201 cs = mat->cmap->rstart; 1202 nz = A->nz + B->nz; 1203 1204 /* write matrix header */ 1205 header[0] = MAT_FILE_CLASSID; 1206 header[1] = M; header[2] = N; header[3] = nz; 1207 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1208 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1209 1210 /* fill in and store row lengths */ 1211 PetscCall(PetscMalloc1(m,&rowlens)); 1212 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1213 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1214 PetscCall(PetscFree(rowlens)); 1215 1216 /* fill in and store column indices */ 1217 PetscCall(PetscMalloc1(nz,&colidxs)); 1218 for (cnt=0, i=0; i<m; i++) { 1219 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1220 if (garray[B->j[jb]] > cs) break; 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1224 colidxs[cnt++] = A->j[ja] + cs; 1225 for (; jb<B->i[i+1]; jb++) 1226 colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1235 PetscCall(PetscMalloc1(nz,&matvals)); 1236 for (cnt=0, i=0; i<m; i++) { 1237 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1242 matvals[cnt++] = aa[ja]; 1243 for (; jb<B->i[i+1]; jb++) 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1247 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1248 PetscCheckFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1249 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1250 PetscCall(PetscFree(matvals)); 1251 1252 /* write block size option to the viewer's .info file */ 1253 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #include <petscdraw.h> 1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 PetscMPIInt rank = aij->rank,size = aij->size; 1262 PetscBool isdraw,iascii,isbinary; 1263 PetscViewer sviewer; 1264 PetscViewerFormat format; 1265 1266 PetscFunctionBegin; 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1270 if (iascii) { 1271 PetscCall(PetscViewerGetFormat(viewer,&format)); 1272 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1273 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1274 PetscCall(PetscMalloc1(size,&nz)); 1275 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1276 for (i=0; i<(PetscInt)size; i++) { 1277 nmax = PetscMax(nmax,nz[i]); 1278 nmin = PetscMin(nmin,nz[i]); 1279 navg += nz[i]; 1280 } 1281 PetscCall(PetscFree(nz)); 1282 navg = navg/size; 1283 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1284 PetscFunctionReturn(0); 1285 } 1286 PetscCall(PetscViewerGetFormat(viewer,&format)); 1287 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1288 MatInfo info; 1289 PetscInt *inodes=NULL; 1290 1291 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1292 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1293 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1294 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1295 if (!inodes) { 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1298 } else { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1301 } 1302 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1304 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1306 PetscCall(PetscViewerFlush(viewer)); 1307 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1309 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1310 PetscFunctionReturn(0); 1311 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1312 PetscInt inodecount,inodelimit,*inodes; 1313 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1314 if (inodes) { 1315 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1316 } else { 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1318 } 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1321 PetscFunctionReturn(0); 1322 } 1323 } else if (isbinary) { 1324 if (size == 1) { 1325 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1326 PetscCall(MatView(aij->A,viewer)); 1327 } else { 1328 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (iascii && size == 1) { 1332 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1333 PetscCall(MatView(aij->A,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1339 PetscCall(PetscDrawIsNull(draw,&isnull)); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { /* assemble the entire matrix onto first processor */ 1344 Mat A = NULL, Av; 1345 IS isrow,iscol; 1346 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1348 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1349 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1350 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1351 /* The commented code uses MatCreateSubMatrices instead */ 1352 /* 1353 Mat *AA, A = NULL, Av; 1354 IS isrow,iscol; 1355 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1358 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1359 if (rank == 0) { 1360 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1361 A = AA[0]; 1362 Av = AA[0]; 1363 } 1364 PetscCall(MatDestroySubMatrices(1,&AA)); 1365 */ 1366 PetscCall(ISDestroy(&iscol)); 1367 PetscCall(ISDestroy(&isrow)); 1368 /* 1369 Everyone has to call to draw the matrix since the graphics waits are 1370 synchronized across all processors that share the PetscDraw object 1371 */ 1372 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1373 if (rank == 0) { 1374 if (((PetscObject)mat)->name) { 1375 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1376 } 1377 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1378 } 1379 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1380 PetscCall(PetscViewerFlush(viewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1387 { 1388 PetscBool iascii,isdraw,issocket,isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1395 if (iascii || isdraw || isbinary || issocket) { 1396 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1402 { 1403 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1404 Vec bb1 = NULL; 1405 PetscBool hasop; 1406 1407 PetscFunctionBegin; 1408 if (flag == SOR_APPLY_UPPER) { 1409 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1414 PetscCall(VecDuplicate(bb,&bb1)); 1415 } 1416 1417 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1418 if (flag & SOR_ZERO_INITIAL_GUESS) { 1419 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1420 its--; 1421 } 1422 1423 while (its--) { 1424 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1425 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1426 1427 /* update rhs: bb1 = bb - B*x */ 1428 PetscCall(VecScale(mat->lvec,-1.0)); 1429 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1430 1431 /* local sweep */ 1432 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1433 } 1434 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1435 if (flag & SOR_ZERO_INITIAL_GUESS) { 1436 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1437 its--; 1438 } 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec,-1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec,-1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1465 } 1466 } else if (flag & SOR_EISENSTAT) { 1467 Vec xx1; 1468 1469 PetscCall(VecDuplicate(bb,&xx1)); 1470 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1471 1472 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1474 if (!mat->diag) { 1475 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1476 PetscCall(MatGetDiagonal(matin,mat->diag)); 1477 } 1478 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1479 if (hasop) { 1480 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1481 } else { 1482 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1483 } 1484 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1485 1486 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1490 PetscCall(VecAXPY(xx,1.0,xx1)); 1491 PetscCall(VecDestroy(&xx1)); 1492 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1493 1494 PetscCall(VecDestroy(&bb1)); 1495 1496 matin->factorerrortype = mat->A->factorerrortype; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1501 { 1502 Mat aA,aB,Aperm; 1503 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1504 PetscScalar *aa,*ba; 1505 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1506 PetscSF rowsf,sf; 1507 IS parcolp = NULL; 1508 PetscBool done; 1509 1510 PetscFunctionBegin; 1511 PetscCall(MatGetLocalSize(A,&m,&n)); 1512 PetscCall(ISGetIndices(rowp,&rwant)); 1513 PetscCall(ISGetIndices(colp,&cwant)); 1514 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1515 1516 /* Invert row permutation to find out where my rows should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1518 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1519 PetscCall(PetscSFSetFromOptions(rowsf)); 1520 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1523 1524 /* Invert column permutation to find out where my columns should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1526 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1527 PetscCall(PetscSFSetFromOptions(sf)); 1528 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1531 PetscCall(PetscSFDestroy(&sf)); 1532 1533 PetscCall(ISRestoreIndices(rowp,&rwant)); 1534 PetscCall(ISRestoreIndices(colp,&cwant)); 1535 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1536 1537 /* Find out where my gcols should go */ 1538 PetscCall(MatGetSize(aB,NULL,&ng)); 1539 PetscCall(PetscMalloc1(ng,&gcdest)); 1540 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1541 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1542 PetscCall(PetscSFSetFromOptions(sf)); 1543 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1544 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1545 PetscCall(PetscSFDestroy(&sf)); 1546 1547 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1548 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1549 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1550 for (i=0; i<m; i++) { 1551 PetscInt row = rdest[i]; 1552 PetscMPIInt rowner; 1553 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1554 for (j=ai[i]; j<ai[i+1]; j++) { 1555 PetscInt col = cdest[aj[j]]; 1556 PetscMPIInt cowner; 1557 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 for (j=bi[i]; j<bi[i+1]; j++) { 1562 PetscInt col = gcdest[bj[j]]; 1563 PetscMPIInt cowner; 1564 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 } 1569 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1570 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1571 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1572 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1573 PetscCall(PetscSFDestroy(&rowsf)); 1574 1575 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1576 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1577 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1578 for (i=0; i<m; i++) { 1579 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1580 PetscInt j0,rowlen; 1581 rowlen = ai[i+1] - ai[i]; 1582 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1583 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1584 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1585 } 1586 rowlen = bi[i+1] - bi[i]; 1587 for (j0=j=0; j<rowlen; j0=j) { 1588 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1589 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1590 } 1591 } 1592 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1594 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1595 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1596 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1597 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1598 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1599 PetscCall(PetscFree3(work,rdest,cdest)); 1600 PetscCall(PetscFree(gcdest)); 1601 if (parcolp) PetscCall(ISDestroy(&colp)); 1602 *B = Aperm; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1607 { 1608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1609 1610 PetscFunctionBegin; 1611 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscLogDouble isend[5],irecv[5]; 1621 1622 PetscFunctionBegin; 1623 info->block_size = 1.0; 1624 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1625 1626 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1627 isend[3] = info->memory; isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1630 1631 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1632 isend[3] += info->memory; isend[4] += info->mallocs; 1633 if (flag == MAT_LOCAL) { 1634 info->nz_used = isend[0]; 1635 info->nz_allocated = isend[1]; 1636 info->nz_unneeded = isend[2]; 1637 info->memory = isend[3]; 1638 info->mallocs = isend[4]; 1639 } else if (flag == MAT_GLOBAL_MAX) { 1640 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1641 1642 info->nz_used = irecv[0]; 1643 info->nz_allocated = irecv[1]; 1644 info->nz_unneeded = irecv[2]; 1645 info->memory = irecv[3]; 1646 info->mallocs = irecv[4]; 1647 } else if (flag == MAT_GLOBAL_SUM) { 1648 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } 1656 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1657 info->fill_ratio_needed = 0; 1658 info->factor_mallocs = 0; 1659 PetscFunctionReturn(0); 1660 } 1661 1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 1666 PetscFunctionBegin; 1667 switch (op) { 1668 case MAT_NEW_NONZERO_LOCATIONS: 1669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1670 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1671 case MAT_KEEP_NONZERO_PATTERN: 1672 case MAT_NEW_NONZERO_LOCATION_ERR: 1673 case MAT_USE_INODES: 1674 case MAT_IGNORE_ZERO_ENTRIES: 1675 case MAT_FORM_EXPLICIT_TRANSPOSE: 1676 MatCheckPreallocated(A,1); 1677 PetscCall(MatSetOption(a->A,op,flg)); 1678 PetscCall(MatSetOption(a->B,op,flg)); 1679 break; 1680 case MAT_ROW_ORIENTED: 1681 MatCheckPreallocated(A,1); 1682 a->roworiented = flg; 1683 1684 PetscCall(MatSetOption(a->A,op,flg)); 1685 PetscCall(MatSetOption(a->B,op,flg)); 1686 break; 1687 case MAT_FORCE_DIAGONAL_ENTRIES: 1688 case MAT_SORTED_FULL: 1689 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1690 break; 1691 case MAT_IGNORE_OFF_PROC_ENTRIES: 1692 a->donotstash = flg; 1693 break; 1694 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1695 case MAT_SPD: 1696 case MAT_SYMMETRIC: 1697 case MAT_STRUCTURALLY_SYMMETRIC: 1698 case MAT_HERMITIAN: 1699 case MAT_SYMMETRY_ETERNAL: 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1716 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1717 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1718 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1719 PetscInt *cmap,*idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1730 PetscInt max = 1,tmp; 1731 for (i=0; i<matin->rmap->n; i++) { 1732 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1736 } 1737 1738 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1742 if (!v) {pvA = NULL; pvB = NULL;} 1743 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1744 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1745 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1746 nztot = nzA + nzB; 1747 1748 cmap = mat->garray; 1749 if (v || idx) { 1750 if (nztot) { 1751 /* Sort by increasing column numbers, assuming A and B already sorted */ 1752 PetscInt imark = -1; 1753 if (v) { 1754 *v = v_p = mat->rowvalues; 1755 for (i=0; i<nzB; i++) { 1756 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1757 else break; 1758 } 1759 imark = i; 1760 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1761 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1762 } 1763 if (idx) { 1764 *idx = idx_p = mat->rowindices; 1765 if (imark > -1) { 1766 for (i=0; i<imark; i++) { 1767 idx_p[i] = cmap[cworkB[i]]; 1768 } 1769 } else { 1770 for (i=0; i<nzB; i++) { 1771 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1772 else break; 1773 } 1774 imark = i; 1775 } 1776 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1777 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1778 } 1779 } else { 1780 if (idx) *idx = NULL; 1781 if (v) *v = NULL; 1782 } 1783 } 1784 *nz = nztot; 1785 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1786 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1791 { 1792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1793 1794 PetscFunctionBegin; 1795 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1796 aij->getrowactive = PETSC_FALSE; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1801 { 1802 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1803 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1804 PetscInt i,j,cstart = mat->cmap->rstart; 1805 PetscReal sum = 0.0; 1806 const MatScalar *v,*amata,*bmata; 1807 1808 PetscFunctionBegin; 1809 if (aij->size == 1) { 1810 PetscCall(MatNorm(aij->A,type,norm)); 1811 } else { 1812 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1813 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1814 if (type == NORM_FROBENIUS) { 1815 v = amata; 1816 for (i=0; i<amat->nz; i++) { 1817 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1818 } 1819 v = bmata; 1820 for (i=0; i<bmat->nz; i++) { 1821 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1822 } 1823 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp,*tmp2; 1828 PetscInt *jj,*garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1831 *norm = 0.0; 1832 v = amata; jj = amat->j; 1833 for (j=0; j<amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1835 } 1836 v = bmata; jj = bmat->j; 1837 for (j=0; j<bmat->nz; j++) { 1838 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1841 for (j=0; j<mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j=0; j<aij->A->rmap->n; j++) { 1850 v = amata + amat->i[j]; 1851 sum = 0.0; 1852 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); v++; 1854 } 1855 v = bmata + bmat->i[j]; 1856 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); v++; 1858 } 1859 if (sum > ntemp) ntemp = sum; 1860 } 1861 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1862 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1863 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1864 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1865 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 Mat B,A_diag,*B_diag; 1877 const MatScalar *pbv,*bv; 1878 1879 PetscFunctionBegin; 1880 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1881 ai = Aloc->i; aj = Aloc->j; 1882 bi = Bloc->i; bj = Bloc->j; 1883 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1884 PetscInt *d_nnz,*g_nnz,*o_nnz; 1885 PetscSFNode *oloc; 1886 PETSC_UNUSED PetscSF sf; 1887 1888 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1889 /* compute d_nnz for preallocation */ 1890 PetscCall(PetscArrayzero(d_nnz,na)); 1891 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1892 /* compute local off-diagonal contributions */ 1893 PetscCall(PetscArrayzero(g_nnz,nb)); 1894 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1895 /* map those to global */ 1896 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1897 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1898 PetscCall(PetscSFSetFromOptions(sf)); 1899 PetscCall(PetscArrayzero(o_nnz,na)); 1900 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1901 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1902 PetscCall(PetscSFDestroy(&sf)); 1903 1904 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1905 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1906 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1907 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1908 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1909 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1910 } else { 1911 B = *matout; 1912 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1913 } 1914 1915 b = (Mat_MPIAIJ*)B->data; 1916 A_diag = a->A; 1917 B_diag = &b->A; 1918 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1919 A_diag_ncol = A_diag->cmap->N; 1920 B_diag_ilen = sub_B_diag->ilen; 1921 B_diag_i = sub_B_diag->i; 1922 1923 /* Set ilen for diagonal of B */ 1924 for (i=0; i<A_diag_ncol; i++) { 1925 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1926 } 1927 1928 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1929 very quickly (=without using MatSetValues), because all writes are local. */ 1930 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1931 1932 /* copy over the B part */ 1933 PetscCall(PetscMalloc1(bi[mb],&cols)); 1934 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1935 pbv = bv; 1936 row = A->rmap->rstart; 1937 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1938 cols_tmp = cols; 1939 for (i=0; i<mb; i++) { 1940 ncol = bi[i+1]-bi[i]; 1941 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1942 row++; 1943 pbv += ncol; cols_tmp += ncol; 1944 } 1945 PetscCall(PetscFree(cols)); 1946 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1947 1948 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1949 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1950 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1951 *matout = B; 1952 } else { 1953 PetscCall(MatHeaderMerge(A,&B)); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat a = aij->A,b = aij->B; 1962 PetscInt s1,s2,s3; 1963 1964 PetscFunctionBegin; 1965 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1966 if (rr) { 1967 PetscCall(VecGetLocalSize(rr,&s1)); 1968 PetscCheckFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1969 /* Overlap communication with computation. */ 1970 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1971 } 1972 if (ll) { 1973 PetscCall(VecGetLocalSize(ll,&s1)); 1974 PetscCheckFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1975 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1976 } 1977 /* scale the diagonal block */ 1978 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1979 1980 if (rr) { 1981 /* Do a scatter end and then right scale the off-diagonal block */ 1982 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1983 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1989 { 1990 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1991 1992 PetscFunctionBegin; 1993 PetscCall(MatSetUnfactored(a->A)); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1998 { 1999 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2000 Mat a,b,c,d; 2001 PetscBool flg; 2002 2003 PetscFunctionBegin; 2004 a = matA->A; b = matA->B; 2005 c = matB->A; d = matB->B; 2006 2007 PetscCall(MatEqual(a,c,&flg)); 2008 if (flg) { 2009 PetscCall(MatEqual(b,d,&flg)); 2010 } 2011 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2016 { 2017 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2018 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2019 2020 PetscFunctionBegin; 2021 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2022 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2023 /* because of the column compression in the off-processor part of the matrix a->B, 2024 the number of columns in a->B and b->B may be different, hence we cannot call 2025 the MatCopy() directly on the two parts. If need be, we can provide a more 2026 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2027 then copying the submatrices */ 2028 PetscCall(MatCopy_Basic(A,B,str)); 2029 } else { 2030 PetscCall(MatCopy(a->A,b->A,str)); 2031 PetscCall(MatCopy(a->B,b->B,str)); 2032 } 2033 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2038 { 2039 PetscFunctionBegin; 2040 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 /* 2045 Computes the number of nonzeros per row needed for preallocation when X and Y 2046 have different nonzero structure. 2047 */ 2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2049 { 2050 PetscInt i,j,k,nzx,nzy; 2051 2052 PetscFunctionBegin; 2053 /* Set the number of nonzeros in the new matrix */ 2054 for (i=0; i<m; i++) { 2055 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2056 nzx = xi[i+1] - xi[i]; 2057 nzy = yi[i+1] - yi[i]; 2058 nnz[i] = 0; 2059 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2060 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2061 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2062 nnz[i]++; 2063 } 2064 for (; k<nzy; k++) nnz[i]++; 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2071 { 2072 PetscInt m = Y->rmap->N; 2073 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2074 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2075 2076 PetscFunctionBegin; 2077 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2082 { 2083 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 if (str == SAME_NONZERO_PATTERN) { 2087 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2088 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2089 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2090 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2091 } else { 2092 Mat B; 2093 PetscInt *nnz_d,*nnz_o; 2094 2095 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2096 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2097 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2098 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2099 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2100 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2101 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2102 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2103 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2104 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2105 PetscCall(MatHeaderMerge(Y,&B)); 2106 PetscCall(PetscFree(nnz_d)); 2107 PetscCall(PetscFree(nnz_o)); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2113 2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2115 { 2116 PetscFunctionBegin; 2117 if (PetscDefined(USE_COMPLEX)) { 2118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2119 2120 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2121 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 2130 PetscFunctionBegin; 2131 PetscCall(MatRealPart(a->A)); 2132 PetscCall(MatRealPart(a->B)); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2137 { 2138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2139 2140 PetscFunctionBegin; 2141 PetscCall(MatImaginaryPart(a->A)); 2142 PetscCall(MatImaginaryPart(a->B)); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2147 { 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 PetscInt i,*idxb = NULL,m = A->rmap->n; 2150 PetscScalar *va,*vv; 2151 Vec vB,vA; 2152 const PetscScalar *vb; 2153 2154 PetscFunctionBegin; 2155 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2156 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2157 2158 PetscCall(VecGetArrayWrite(vA,&va)); 2159 if (idx) { 2160 for (i=0; i<m; i++) { 2161 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2162 } 2163 } 2164 2165 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2166 PetscCall(PetscMalloc1(m,&idxb)); 2167 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2168 2169 PetscCall(VecGetArrayWrite(v,&vv)); 2170 PetscCall(VecGetArrayRead(vB,&vb)); 2171 for (i=0; i<m; i++) { 2172 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2173 vv[i] = vb[i]; 2174 if (idx) idx[i] = a->garray[idxb[i]]; 2175 } else { 2176 vv[i] = va[i]; 2177 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2178 idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2182 PetscCall(VecRestoreArrayWrite(vA,&va)); 2183 PetscCall(VecRestoreArrayRead(vB,&vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2191 { 2192 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2193 PetscInt m = A->rmap->n,n = A->cmap->n; 2194 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2195 PetscInt *cmap = mat->garray; 2196 PetscInt *diagIdx, *offdiagIdx; 2197 Vec diagV, offdiagV; 2198 PetscScalar *a, *diagA, *offdiagA; 2199 const PetscScalar *ba,*bav; 2200 PetscInt r,j,col,ncols,*bi,*bj; 2201 Mat B = mat->B; 2202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2203 2204 PetscFunctionBegin; 2205 /* When a process holds entire A and other processes have no entry */ 2206 if (A->cmap->N == n) { 2207 PetscCall(VecGetArrayWrite(v,&diagA)); 2208 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2209 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2210 PetscCall(VecDestroy(&diagV)); 2211 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2212 PetscFunctionReturn(0); 2213 } else if (n == 0) { 2214 if (m) { 2215 PetscCall(VecGetArrayWrite(v,&a)); 2216 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2217 PetscCall(VecRestoreArrayWrite(v,&a)); 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2223 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2224 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2225 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2226 2227 /* Get offdiagIdx[] for implicit 0.0 */ 2228 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2229 ba = bav; 2230 bi = b->i; 2231 bj = b->j; 2232 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2233 for (r = 0; r < m; r++) { 2234 ncols = bi[r+1] - bi[r]; 2235 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2236 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2237 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2238 offdiagA[r] = 0.0; 2239 2240 /* Find first hole in the cmap */ 2241 for (j=0; j<ncols; j++) { 2242 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2243 if (col > j && j < cstart) { 2244 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2245 break; 2246 } else if (col > j + n && j >= cstart) { 2247 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2248 break; 2249 } 2250 } 2251 if (j == ncols && ncols < A->cmap->N - n) { 2252 /* a hole is outside compressed Bcols */ 2253 if (ncols == 0) { 2254 if (cstart) { 2255 offdiagIdx[r] = 0; 2256 } else offdiagIdx[r] = cend; 2257 } else { /* ncols > 0 */ 2258 offdiagIdx[r] = cmap[ncols-1] + 1; 2259 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2260 } 2261 } 2262 } 2263 2264 for (j=0; j<ncols; j++) { 2265 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2266 ba++; bj++; 2267 } 2268 } 2269 2270 PetscCall(VecGetArrayWrite(v, &a)); 2271 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2272 for (r = 0; r < m; ++r) { 2273 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2274 a[r] = diagA[r]; 2275 if (idx) idx[r] = cstart + diagIdx[r]; 2276 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2277 a[r] = diagA[r]; 2278 if (idx) { 2279 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2280 idx[r] = cstart + diagIdx[r]; 2281 } else idx[r] = offdiagIdx[r]; 2282 } 2283 } else { 2284 a[r] = offdiagA[r]; 2285 if (idx) idx[r] = offdiagIdx[r]; 2286 } 2287 } 2288 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2289 PetscCall(VecRestoreArrayWrite(v, &a)); 2290 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2291 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2292 PetscCall(VecDestroy(&diagV)); 2293 PetscCall(VecDestroy(&offdiagV)); 2294 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2301 PetscInt m = A->rmap->n,n = A->cmap->n; 2302 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2303 PetscInt *cmap = mat->garray; 2304 PetscInt *diagIdx, *offdiagIdx; 2305 Vec diagV, offdiagV; 2306 PetscScalar *a, *diagA, *offdiagA; 2307 const PetscScalar *ba,*bav; 2308 PetscInt r,j,col,ncols,*bi,*bj; 2309 Mat B = mat->B; 2310 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2311 2312 PetscFunctionBegin; 2313 /* When a process holds entire A and other processes have no entry */ 2314 if (A->cmap->N == n) { 2315 PetscCall(VecGetArrayWrite(v,&diagA)); 2316 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2317 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2318 PetscCall(VecDestroy(&diagV)); 2319 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2320 PetscFunctionReturn(0); 2321 } else if (n == 0) { 2322 if (m) { 2323 PetscCall(VecGetArrayWrite(v,&a)); 2324 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2325 PetscCall(VecRestoreArrayWrite(v,&a)); 2326 } 2327 PetscFunctionReturn(0); 2328 } 2329 2330 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2331 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2332 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2333 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2334 2335 /* Get offdiagIdx[] for implicit 0.0 */ 2336 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2337 ba = bav; 2338 bi = b->i; 2339 bj = b->j; 2340 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2341 for (r = 0; r < m; r++) { 2342 ncols = bi[r+1] - bi[r]; 2343 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2344 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2345 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2346 offdiagA[r] = 0.0; 2347 2348 /* Find first hole in the cmap */ 2349 for (j=0; j<ncols; j++) { 2350 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2351 if (col > j && j < cstart) { 2352 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2353 break; 2354 } else if (col > j + n && j >= cstart) { 2355 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2356 break; 2357 } 2358 } 2359 if (j == ncols && ncols < A->cmap->N - n) { 2360 /* a hole is outside compressed Bcols */ 2361 if (ncols == 0) { 2362 if (cstart) { 2363 offdiagIdx[r] = 0; 2364 } else offdiagIdx[r] = cend; 2365 } else { /* ncols > 0 */ 2366 offdiagIdx[r] = cmap[ncols-1] + 1; 2367 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2368 } 2369 } 2370 } 2371 2372 for (j=0; j<ncols; j++) { 2373 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2374 ba++; bj++; 2375 } 2376 } 2377 2378 PetscCall(VecGetArrayWrite(v, &a)); 2379 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2380 for (r = 0; r < m; ++r) { 2381 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2382 a[r] = diagA[r]; 2383 if (idx) idx[r] = cstart + diagIdx[r]; 2384 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2385 a[r] = diagA[r]; 2386 if (idx) { 2387 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2388 idx[r] = cstart + diagIdx[r]; 2389 } else idx[r] = offdiagIdx[r]; 2390 } 2391 } else { 2392 a[r] = offdiagA[r]; 2393 if (idx) idx[r] = offdiagIdx[r]; 2394 } 2395 } 2396 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2397 PetscCall(VecRestoreArrayWrite(v, &a)); 2398 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2399 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2400 PetscCall(VecDestroy(&diagV)); 2401 PetscCall(VecDestroy(&offdiagV)); 2402 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2407 { 2408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2409 PetscInt m = A->rmap->n,n = A->cmap->n; 2410 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2411 PetscInt *cmap = mat->garray; 2412 PetscInt *diagIdx, *offdiagIdx; 2413 Vec diagV, offdiagV; 2414 PetscScalar *a, *diagA, *offdiagA; 2415 const PetscScalar *ba,*bav; 2416 PetscInt r,j,col,ncols,*bi,*bj; 2417 Mat B = mat->B; 2418 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2419 2420 PetscFunctionBegin; 2421 /* When a process holds entire A and other processes have no entry */ 2422 if (A->cmap->N == n) { 2423 PetscCall(VecGetArrayWrite(v,&diagA)); 2424 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2425 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2426 PetscCall(VecDestroy(&diagV)); 2427 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2428 PetscFunctionReturn(0); 2429 } else if (n == 0) { 2430 if (m) { 2431 PetscCall(VecGetArrayWrite(v,&a)); 2432 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2433 PetscCall(VecRestoreArrayWrite(v,&a)); 2434 } 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2439 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2440 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2441 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2442 2443 /* Get offdiagIdx[] for implicit 0.0 */ 2444 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2445 ba = bav; 2446 bi = b->i; 2447 bj = b->j; 2448 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2449 for (r = 0; r < m; r++) { 2450 ncols = bi[r+1] - bi[r]; 2451 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2452 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2453 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2454 offdiagA[r] = 0.0; 2455 2456 /* Find first hole in the cmap */ 2457 for (j=0; j<ncols; j++) { 2458 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2459 if (col > j && j < cstart) { 2460 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2461 break; 2462 } else if (col > j + n && j >= cstart) { 2463 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2464 break; 2465 } 2466 } 2467 if (j == ncols && ncols < A->cmap->N - n) { 2468 /* a hole is outside compressed Bcols */ 2469 if (ncols == 0) { 2470 if (cstart) { 2471 offdiagIdx[r] = 0; 2472 } else offdiagIdx[r] = cend; 2473 } else { /* ncols > 0 */ 2474 offdiagIdx[r] = cmap[ncols-1] + 1; 2475 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2476 } 2477 } 2478 } 2479 2480 for (j=0; j<ncols; j++) { 2481 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2482 ba++; bj++; 2483 } 2484 } 2485 2486 PetscCall(VecGetArrayWrite(v, &a)); 2487 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2488 for (r = 0; r < m; ++r) { 2489 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2490 a[r] = diagA[r]; 2491 if (idx) idx[r] = cstart + diagIdx[r]; 2492 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2493 a[r] = diagA[r]; 2494 if (idx) { 2495 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2496 idx[r] = cstart + diagIdx[r]; 2497 } else idx[r] = offdiagIdx[r]; 2498 } 2499 } else { 2500 a[r] = offdiagA[r]; 2501 if (idx) idx[r] = offdiagIdx[r]; 2502 } 2503 } 2504 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2505 PetscCall(VecRestoreArrayWrite(v, &a)); 2506 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2507 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2508 PetscCall(VecDestroy(&diagV)); 2509 PetscCall(VecDestroy(&offdiagV)); 2510 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2515 { 2516 Mat *dummy; 2517 2518 PetscFunctionBegin; 2519 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2520 *newmat = *dummy; 2521 PetscCall(PetscFree(dummy)); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2526 { 2527 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2528 2529 PetscFunctionBegin; 2530 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2531 A->factorerrortype = a->A->factorerrortype; 2532 PetscFunctionReturn(0); 2533 } 2534 2535 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2536 { 2537 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2538 2539 PetscFunctionBegin; 2540 PetscCheckFalse(!x->assembled && !x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2541 PetscCall(MatSetRandom(aij->A,rctx)); 2542 if (x->assembled) { 2543 PetscCall(MatSetRandom(aij->B,rctx)); 2544 } else { 2545 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2546 } 2547 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2548 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2553 { 2554 PetscFunctionBegin; 2555 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2556 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /*@ 2561 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2562 2563 Collective on Mat 2564 2565 Input Parameters: 2566 + A - the matrix 2567 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2568 2569 Level: advanced 2570 2571 @*/ 2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2573 { 2574 PetscFunctionBegin; 2575 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2580 { 2581 PetscBool sc = PETSC_FALSE,flg; 2582 2583 PetscFunctionBegin; 2584 PetscCall(PetscOptionsHead(PetscOptionsObject,"MPIAIJ options")); 2585 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2586 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2587 if (flg) { 2588 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2589 } 2590 PetscCall(PetscOptionsTail()); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2595 { 2596 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2597 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2598 2599 PetscFunctionBegin; 2600 if (!Y->preallocated) { 2601 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2602 } else if (!aij->nz) { 2603 PetscInt nonew = aij->nonew; 2604 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2605 aij->nonew = nonew; 2606 } 2607 PetscCall(MatShift_Basic(Y,a)); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2612 { 2613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2614 2615 PetscFunctionBegin; 2616 PetscCheckFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2617 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2618 if (d) { 2619 PetscInt rstart; 2620 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2621 *d += rstart; 2622 2623 } 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2628 { 2629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2630 2631 PetscFunctionBegin; 2632 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 /* -------------------------------------------------------------------*/ 2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2638 MatGetRow_MPIAIJ, 2639 MatRestoreRow_MPIAIJ, 2640 MatMult_MPIAIJ, 2641 /* 4*/ MatMultAdd_MPIAIJ, 2642 MatMultTranspose_MPIAIJ, 2643 MatMultTransposeAdd_MPIAIJ, 2644 NULL, 2645 NULL, 2646 NULL, 2647 /*10*/ NULL, 2648 NULL, 2649 NULL, 2650 MatSOR_MPIAIJ, 2651 MatTranspose_MPIAIJ, 2652 /*15*/ MatGetInfo_MPIAIJ, 2653 MatEqual_MPIAIJ, 2654 MatGetDiagonal_MPIAIJ, 2655 MatDiagonalScale_MPIAIJ, 2656 MatNorm_MPIAIJ, 2657 /*20*/ MatAssemblyBegin_MPIAIJ, 2658 MatAssemblyEnd_MPIAIJ, 2659 MatSetOption_MPIAIJ, 2660 MatZeroEntries_MPIAIJ, 2661 /*24*/ MatZeroRows_MPIAIJ, 2662 NULL, 2663 NULL, 2664 NULL, 2665 NULL, 2666 /*29*/ MatSetUp_MPIAIJ, 2667 NULL, 2668 NULL, 2669 MatGetDiagonalBlock_MPIAIJ, 2670 NULL, 2671 /*34*/ MatDuplicate_MPIAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*39*/ MatAXPY_MPIAIJ, 2677 MatCreateSubMatrices_MPIAIJ, 2678 MatIncreaseOverlap_MPIAIJ, 2679 MatGetValues_MPIAIJ, 2680 MatCopy_MPIAIJ, 2681 /*44*/ MatGetRowMax_MPIAIJ, 2682 MatScale_MPIAIJ, 2683 MatShift_MPIAIJ, 2684 MatDiagonalSet_MPIAIJ, 2685 MatZeroRowsColumns_MPIAIJ, 2686 /*49*/ MatSetRandom_MPIAIJ, 2687 NULL, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*54*/ MatFDColoringCreate_MPIXAIJ, 2692 NULL, 2693 MatSetUnfactored_MPIAIJ, 2694 MatPermute_MPIAIJ, 2695 NULL, 2696 /*59*/ MatCreateSubMatrix_MPIAIJ, 2697 MatDestroy_MPIAIJ, 2698 MatView_MPIAIJ, 2699 NULL, 2700 NULL, 2701 /*64*/ NULL, 2702 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2703 NULL, 2704 NULL, 2705 NULL, 2706 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2707 MatGetRowMinAbs_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*75*/ MatFDColoringApply_AIJ, 2713 MatSetFromOptions_MPIAIJ, 2714 NULL, 2715 NULL, 2716 MatFindZeroDiagonals_MPIAIJ, 2717 /*80*/ NULL, 2718 NULL, 2719 NULL, 2720 /*83*/ MatLoad_MPIAIJ, 2721 MatIsSymmetric_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*89*/ NULL, 2727 NULL, 2728 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2729 NULL, 2730 NULL, 2731 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 MatBindToCPU_MPIAIJ, 2736 /*99*/ MatProductSetFromOptions_MPIAIJ, 2737 NULL, 2738 NULL, 2739 MatConjugate_MPIAIJ, 2740 NULL, 2741 /*104*/MatSetValuesRow_MPIAIJ, 2742 MatRealPart_MPIAIJ, 2743 MatImaginaryPart_MPIAIJ, 2744 NULL, 2745 NULL, 2746 /*109*/NULL, 2747 NULL, 2748 MatGetRowMin_MPIAIJ, 2749 NULL, 2750 MatMissingDiagonal_MPIAIJ, 2751 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2752 NULL, 2753 MatGetGhosts_MPIAIJ, 2754 NULL, 2755 NULL, 2756 /*119*/MatMultDiagonalBlock_MPIAIJ, 2757 NULL, 2758 NULL, 2759 NULL, 2760 MatGetMultiProcBlock_MPIAIJ, 2761 /*124*/MatFindNonzeroRows_MPIAIJ, 2762 MatGetColumnReductions_MPIAIJ, 2763 MatInvertBlockDiagonal_MPIAIJ, 2764 MatInvertVariableBlockDiagonal_MPIAIJ, 2765 MatCreateSubMatricesMPI_MPIAIJ, 2766 /*129*/NULL, 2767 NULL, 2768 NULL, 2769 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2770 NULL, 2771 /*134*/NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 NULL, 2776 /*139*/MatSetBlockSizes_MPIAIJ, 2777 NULL, 2778 NULL, 2779 MatFDColoringSetUp_MPIXAIJ, 2780 MatFindOffBlockDiagonalEntries_MPIAIJ, 2781 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2782 /*145*/NULL, 2783 NULL, 2784 NULL 2785 }; 2786 2787 /* ----------------------------------------------------------------------------------------*/ 2788 2789 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2790 { 2791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2792 2793 PetscFunctionBegin; 2794 PetscCall(MatStoreValues(aij->A)); 2795 PetscCall(MatStoreValues(aij->B)); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2800 { 2801 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2802 2803 PetscFunctionBegin; 2804 PetscCall(MatRetrieveValues(aij->A)); 2805 PetscCall(MatRetrieveValues(aij->B)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2810 { 2811 Mat_MPIAIJ *b; 2812 PetscMPIInt size; 2813 2814 PetscFunctionBegin; 2815 PetscCall(PetscLayoutSetUp(B->rmap)); 2816 PetscCall(PetscLayoutSetUp(B->cmap)); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 PetscCall(PetscTableDestroy(&b->colmap)); 2821 #else 2822 PetscCall(PetscFree(b->colmap)); 2823 #endif 2824 PetscCall(PetscFree(b->garray)); 2825 PetscCall(VecDestroy(&b->lvec)); 2826 PetscCall(VecScatterDestroy(&b->Mvctx)); 2827 2828 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2829 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2830 PetscCall(MatDestroy(&b->B)); 2831 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2832 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2833 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2834 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2835 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2836 2837 if (!B->preallocated) { 2838 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2839 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2840 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2841 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2842 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2843 } 2844 2845 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2846 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2847 B->preallocated = PETSC_TRUE; 2848 B->was_assembled = PETSC_FALSE; 2849 B->assembled = PETSC_FALSE; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2854 { 2855 Mat_MPIAIJ *b; 2856 2857 PetscFunctionBegin; 2858 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2859 PetscCall(PetscLayoutSetUp(B->rmap)); 2860 PetscCall(PetscLayoutSetUp(B->cmap)); 2861 b = (Mat_MPIAIJ*)B->data; 2862 2863 #if defined(PETSC_USE_CTABLE) 2864 PetscCall(PetscTableDestroy(&b->colmap)); 2865 #else 2866 PetscCall(PetscFree(b->colmap)); 2867 #endif 2868 PetscCall(PetscFree(b->garray)); 2869 PetscCall(VecDestroy(&b->lvec)); 2870 PetscCall(VecScatterDestroy(&b->Mvctx)); 2871 2872 PetscCall(MatResetPreallocation(b->A)); 2873 PetscCall(MatResetPreallocation(b->B)); 2874 B->preallocated = PETSC_TRUE; 2875 B->was_assembled = PETSC_FALSE; 2876 B->assembled = PETSC_FALSE; 2877 PetscFunctionReturn(0); 2878 } 2879 2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2881 { 2882 Mat mat; 2883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2884 2885 PetscFunctionBegin; 2886 *newmat = NULL; 2887 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2888 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2889 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2890 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2891 a = (Mat_MPIAIJ*)mat->data; 2892 2893 mat->factortype = matin->factortype; 2894 mat->assembled = matin->assembled; 2895 mat->insertmode = NOT_SET_VALUES; 2896 mat->preallocated = matin->preallocated; 2897 2898 a->size = oldmat->size; 2899 a->rank = oldmat->rank; 2900 a->donotstash = oldmat->donotstash; 2901 a->roworiented = oldmat->roworiented; 2902 a->rowindices = NULL; 2903 a->rowvalues = NULL; 2904 a->getrowactive = PETSC_FALSE; 2905 2906 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2907 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2908 2909 if (oldmat->colmap) { 2910 #if defined(PETSC_USE_CTABLE) 2911 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2912 #else 2913 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2914 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2915 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2916 #endif 2917 } else a->colmap = NULL; 2918 if (oldmat->garray) { 2919 PetscInt len; 2920 len = oldmat->B->cmap->n; 2921 PetscCall(PetscMalloc1(len+1,&a->garray)); 2922 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2923 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2924 } else a->garray = NULL; 2925 2926 /* It may happen MatDuplicate is called with a non-assembled matrix 2927 In fact, MatDuplicate only requires the matrix to be preallocated 2928 This may happen inside a DMCreateMatrix_Shell */ 2929 if (oldmat->lvec) { 2930 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2931 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2932 } 2933 if (oldmat->Mvctx) { 2934 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2935 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2936 } 2937 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2938 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2939 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2941 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2942 *newmat = mat; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2947 { 2948 PetscBool isbinary, ishdf5; 2949 2950 PetscFunctionBegin; 2951 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2952 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2953 /* force binary viewer to load .info file if it has not yet done so */ 2954 PetscCall(PetscViewerSetUp(viewer)); 2955 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2956 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2957 if (isbinary) { 2958 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2959 } else if (ishdf5) { 2960 #if defined(PETSC_HAVE_HDF5) 2961 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2962 #else 2963 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2964 #endif 2965 } else { 2966 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2967 } 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2972 { 2973 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2974 PetscInt *rowidxs,*colidxs; 2975 PetscScalar *matvals; 2976 2977 PetscFunctionBegin; 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 2980 /* read in matrix header */ 2981 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 2982 PetscCheckFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2983 M = header[1]; N = header[2]; nz = header[3]; 2984 PetscCheckFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 2985 PetscCheckFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 2986 PetscCheckFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2987 2988 /* set block sizes from the viewer's .info file */ 2989 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 2990 /* set global sizes if not set already */ 2991 if (mat->rmap->N < 0) mat->rmap->N = M; 2992 if (mat->cmap->N < 0) mat->cmap->N = N; 2993 PetscCall(PetscLayoutSetUp(mat->rmap)); 2994 PetscCall(PetscLayoutSetUp(mat->cmap)); 2995 2996 /* check if the matrix sizes are correct */ 2997 PetscCall(MatGetSize(mat,&rows,&cols)); 2998 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 2999 3000 /* read in row lengths and build row indices */ 3001 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3002 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3003 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3004 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3005 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3006 PetscCheckFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3007 /* read in column indices and matrix values */ 3008 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3009 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3010 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3011 /* store matrix indices and values */ 3012 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3013 PetscCall(PetscFree(rowidxs)); 3014 PetscCall(PetscFree2(colidxs,matvals)); 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* Not scalable because of ISAllGather() unless getting all columns. */ 3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3020 { 3021 IS iscol_local; 3022 PetscBool isstride; 3023 PetscMPIInt lisstride=0,gisstride; 3024 3025 PetscFunctionBegin; 3026 /* check if we are grabbing all columns*/ 3027 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3028 3029 if (isstride) { 3030 PetscInt start,len,mstart,mlen; 3031 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3032 PetscCall(ISGetLocalSize(iscol,&len)); 3033 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3034 if (mstart == start && mlen-mstart == len) lisstride = 1; 3035 } 3036 3037 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3038 if (gisstride) { 3039 PetscInt N; 3040 PetscCall(MatGetSize(mat,NULL,&N)); 3041 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3042 PetscCall(ISSetIdentity(iscol_local)); 3043 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3044 } else { 3045 PetscInt cbs; 3046 PetscCall(ISGetBlockSize(iscol,&cbs)); 3047 PetscCall(ISAllGather(iscol,&iscol_local)); 3048 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3049 } 3050 3051 *isseq = iscol_local; 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* 3056 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3057 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3058 3059 Input Parameters: 3060 mat - matrix 3061 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3062 i.e., mat->rstart <= isrow[i] < mat->rend 3063 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3064 i.e., mat->cstart <= iscol[i] < mat->cend 3065 Output Parameter: 3066 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3067 iscol_o - sequential column index set for retrieving mat->B 3068 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3069 */ 3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3071 { 3072 Vec x,cmap; 3073 const PetscInt *is_idx; 3074 PetscScalar *xarray,*cmaparray; 3075 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3076 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3077 Mat B=a->B; 3078 Vec lvec=a->lvec,lcmap; 3079 PetscInt i,cstart,cend,Bn=B->cmap->N; 3080 MPI_Comm comm; 3081 VecScatter Mvctx=a->Mvctx; 3082 3083 PetscFunctionBegin; 3084 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3085 PetscCall(ISGetLocalSize(iscol,&ncols)); 3086 3087 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3088 PetscCall(MatCreateVecs(mat,&x,NULL)); 3089 PetscCall(VecSet(x,-1.0)); 3090 PetscCall(VecDuplicate(x,&cmap)); 3091 PetscCall(VecSet(cmap,-1.0)); 3092 3093 /* Get start indices */ 3094 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3095 isstart -= ncols; 3096 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3097 3098 PetscCall(ISGetIndices(iscol,&is_idx)); 3099 PetscCall(VecGetArray(x,&xarray)); 3100 PetscCall(VecGetArray(cmap,&cmaparray)); 3101 PetscCall(PetscMalloc1(ncols,&idx)); 3102 for (i=0; i<ncols; i++) { 3103 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3104 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3105 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3106 } 3107 PetscCall(VecRestoreArray(x,&xarray)); 3108 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3109 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3110 3111 /* Get iscol_d */ 3112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3113 PetscCall(ISGetBlockSize(iscol,&i)); 3114 PetscCall(ISSetBlockSize(*iscol_d,i)); 3115 3116 /* Get isrow_d */ 3117 PetscCall(ISGetLocalSize(isrow,&m)); 3118 rstart = mat->rmap->rstart; 3119 PetscCall(PetscMalloc1(m,&idx)); 3120 PetscCall(ISGetIndices(isrow,&is_idx)); 3121 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3122 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3123 3124 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3125 PetscCall(ISGetBlockSize(isrow,&i)); 3126 PetscCall(ISSetBlockSize(*isrow_d,i)); 3127 3128 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3129 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3130 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3131 3132 PetscCall(VecDuplicate(lvec,&lcmap)); 3133 3134 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3135 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3136 3137 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3138 /* off-process column indices */ 3139 count = 0; 3140 PetscCall(PetscMalloc1(Bn,&idx)); 3141 PetscCall(PetscMalloc1(Bn,&cmap1)); 3142 3143 PetscCall(VecGetArray(lvec,&xarray)); 3144 PetscCall(VecGetArray(lcmap,&cmaparray)); 3145 for (i=0; i<Bn; i++) { 3146 if (PetscRealPart(xarray[i]) > -1.0) { 3147 idx[count] = i; /* local column index in off-diagonal part B */ 3148 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3149 count++; 3150 } 3151 } 3152 PetscCall(VecRestoreArray(lvec,&xarray)); 3153 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3154 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3156 /* cannot ensure iscol_o has same blocksize as iscol! */ 3157 3158 PetscCall(PetscFree(idx)); 3159 *garray = cmap1; 3160 3161 PetscCall(VecDestroy(&x)); 3162 PetscCall(VecDestroy(&cmap)); 3163 PetscCall(VecDestroy(&lcmap)); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3169 { 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3171 Mat M = NULL; 3172 MPI_Comm comm; 3173 IS iscol_d,isrow_d,iscol_o; 3174 Mat Asub = NULL,Bsub = NULL; 3175 PetscInt n; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3179 3180 if (call == MAT_REUSE_MATRIX) { 3181 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3182 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3183 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3184 3185 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3186 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3187 3188 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3189 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3190 3191 /* Update diagonal and off-diagonal portions of submat */ 3192 asub = (Mat_MPIAIJ*)(*submat)->data; 3193 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3194 PetscCall(ISGetLocalSize(iscol_o,&n)); 3195 if (n) { 3196 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3197 } 3198 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3199 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3200 3201 } else { /* call == MAT_INITIAL_MATRIX) */ 3202 const PetscInt *garray; 3203 PetscInt BsubN; 3204 3205 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3206 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3207 3208 /* Create local submatrices Asub and Bsub */ 3209 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3210 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3211 3212 /* Create submatrix M */ 3213 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3214 3215 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3216 asub = (Mat_MPIAIJ*)M->data; 3217 3218 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3219 n = asub->B->cmap->N; 3220 if (BsubN > n) { 3221 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3222 const PetscInt *idx; 3223 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3224 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3225 3226 PetscCall(PetscMalloc1(n,&idx_new)); 3227 j = 0; 3228 PetscCall(ISGetIndices(iscol_o,&idx)); 3229 for (i=0; i<n; i++) { 3230 if (j >= BsubN) break; 3231 while (subgarray[i] > garray[j]) j++; 3232 3233 if (subgarray[i] == garray[j]) { 3234 idx_new[i] = idx[j++]; 3235 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3236 } 3237 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3238 3239 PetscCall(ISDestroy(&iscol_o)); 3240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3241 3242 } else if (BsubN < n) { 3243 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3244 } 3245 3246 PetscCall(PetscFree(garray)); 3247 *submat = M; 3248 3249 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3250 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3251 PetscCall(ISDestroy(&isrow_d)); 3252 3253 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3254 PetscCall(ISDestroy(&iscol_d)); 3255 3256 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3257 PetscCall(ISDestroy(&iscol_o)); 3258 } 3259 PetscFunctionReturn(0); 3260 } 3261 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3263 { 3264 IS iscol_local=NULL,isrow_d; 3265 PetscInt csize; 3266 PetscInt n,i,j,start,end; 3267 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3268 MPI_Comm comm; 3269 3270 PetscFunctionBegin; 3271 /* If isrow has same processor distribution as mat, 3272 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3273 if (call == MAT_REUSE_MATRIX) { 3274 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3275 if (isrow_d) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3278 } else { 3279 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3280 if (iscol_local) { 3281 sameRowDist = PETSC_TRUE; 3282 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3283 } 3284 } 3285 } else { 3286 /* Check if isrow has same processor distribution as mat */ 3287 sameDist[0] = PETSC_FALSE; 3288 PetscCall(ISGetLocalSize(isrow,&n)); 3289 if (!n) { 3290 sameDist[0] = PETSC_TRUE; 3291 } else { 3292 PetscCall(ISGetMinMax(isrow,&i,&j)); 3293 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3294 if (i >= start && j < end) { 3295 sameDist[0] = PETSC_TRUE; 3296 } 3297 } 3298 3299 /* Check if iscol has same processor distribution as mat */ 3300 sameDist[1] = PETSC_FALSE; 3301 PetscCall(ISGetLocalSize(iscol,&n)); 3302 if (!n) { 3303 sameDist[1] = PETSC_TRUE; 3304 } else { 3305 PetscCall(ISGetMinMax(iscol,&i,&j)); 3306 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3307 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3308 } 3309 3310 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3311 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3312 sameRowDist = tsameDist[0]; 3313 } 3314 3315 if (sameRowDist) { 3316 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3317 /* isrow and iscol have same processor distribution as mat */ 3318 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3319 PetscFunctionReturn(0); 3320 } else { /* sameRowDist */ 3321 /* isrow has same processor distribution as mat */ 3322 if (call == MAT_INITIAL_MATRIX) { 3323 PetscBool sorted; 3324 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3325 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3326 PetscCall(ISGetSize(iscol,&i)); 3327 PetscCheckFalse(n != i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3328 3329 PetscCall(ISSorted(iscol_local,&sorted)); 3330 if (sorted) { 3331 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3332 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3333 PetscFunctionReturn(0); 3334 } 3335 } else { /* call == MAT_REUSE_MATRIX */ 3336 IS iscol_sub; 3337 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3338 if (iscol_sub) { 3339 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3340 PetscFunctionReturn(0); 3341 } 3342 } 3343 } 3344 } 3345 3346 /* General case: iscol -> iscol_local which has global size of iscol */ 3347 if (call == MAT_REUSE_MATRIX) { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3349 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3350 } else { 3351 if (!iscol_local) { 3352 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3353 } 3354 } 3355 3356 PetscCall(ISGetLocalSize(iscol,&csize)); 3357 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3358 3359 if (call == MAT_INITIAL_MATRIX) { 3360 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3361 PetscCall(ISDestroy(&iscol_local)); 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 /*@C 3367 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3368 and "off-diagonal" part of the matrix in CSR format. 3369 3370 Collective 3371 3372 Input Parameters: 3373 + comm - MPI communicator 3374 . A - "diagonal" portion of matrix 3375 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3376 - garray - global index of B columns 3377 3378 Output Parameter: 3379 . mat - the matrix, with input A as its local diagonal matrix 3380 Level: advanced 3381 3382 Notes: 3383 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3384 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3385 3386 .seealso: MatCreateMPIAIJWithSplitArrays() 3387 @*/ 3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3389 { 3390 Mat_MPIAIJ *maij; 3391 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3392 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3393 const PetscScalar *oa; 3394 Mat Bnew; 3395 PetscInt m,n,N; 3396 3397 PetscFunctionBegin; 3398 PetscCall(MatCreate(comm,mat)); 3399 PetscCall(MatGetSize(A,&m,&n)); 3400 PetscCheckFalse(m != B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3401 PetscCheckFalse(A->rmap->bs != B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3402 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3403 /* PetscCheckFalse(A->cmap->bs != B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3404 3405 /* Get global columns of mat */ 3406 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3407 3408 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3409 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3410 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3411 maij = (Mat_MPIAIJ*)(*mat)->data; 3412 3413 (*mat)->preallocated = PETSC_TRUE; 3414 3415 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3416 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3417 3418 /* Set A as diagonal portion of *mat */ 3419 maij->A = A; 3420 3421 nz = oi[m]; 3422 for (i=0; i<nz; i++) { 3423 col = oj[i]; 3424 oj[i] = garray[col]; 3425 } 3426 3427 /* Set Bnew as off-diagonal portion of *mat */ 3428 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3429 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3430 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3431 bnew = (Mat_SeqAIJ*)Bnew->data; 3432 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3433 maij->B = Bnew; 3434 3435 PetscCheckFalse(B->rmap->N != Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3436 3437 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3438 b->free_a = PETSC_FALSE; 3439 b->free_ij = PETSC_FALSE; 3440 PetscCall(MatDestroy(&B)); 3441 3442 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3443 bnew->free_a = PETSC_TRUE; 3444 bnew->free_ij = PETSC_TRUE; 3445 3446 /* condense columns of maij->B */ 3447 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3448 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3449 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3450 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3451 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3452 PetscFunctionReturn(0); 3453 } 3454 3455 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3456 3457 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3458 { 3459 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3460 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3461 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3462 Mat M,Msub,B=a->B; 3463 MatScalar *aa; 3464 Mat_SeqAIJ *aij; 3465 PetscInt *garray = a->garray,*colsub,Ncols; 3466 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3467 IS iscol_sub,iscmap; 3468 const PetscInt *is_idx,*cmap; 3469 PetscBool allcolumns=PETSC_FALSE; 3470 MPI_Comm comm; 3471 3472 PetscFunctionBegin; 3473 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3474 if (call == MAT_REUSE_MATRIX) { 3475 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3476 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3477 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3478 3479 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3480 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3481 3482 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3483 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3484 3485 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3486 3487 } else { /* call == MAT_INITIAL_MATRIX) */ 3488 PetscBool flg; 3489 3490 PetscCall(ISGetLocalSize(iscol,&n)); 3491 PetscCall(ISGetSize(iscol,&Ncols)); 3492 3493 /* (1) iscol -> nonscalable iscol_local */ 3494 /* Check for special case: each processor gets entire matrix columns */ 3495 PetscCall(ISIdentity(iscol_local,&flg)); 3496 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3497 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3498 if (allcolumns) { 3499 iscol_sub = iscol_local; 3500 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3501 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3502 3503 } else { 3504 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3505 PetscInt *idx,*cmap1,k; 3506 PetscCall(PetscMalloc1(Ncols,&idx)); 3507 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3508 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3509 count = 0; 3510 k = 0; 3511 for (i=0; i<Ncols; i++) { 3512 j = is_idx[i]; 3513 if (j >= cstart && j < cend) { 3514 /* diagonal part of mat */ 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } else if (Bn) { 3518 /* off-diagonal part of mat */ 3519 if (j == garray[k]) { 3520 idx[count] = j; 3521 cmap1[count++] = i; /* column index in submat */ 3522 } else if (j > garray[k]) { 3523 while (j > garray[k] && k < Bn-1) k++; 3524 if (j == garray[k]) { 3525 idx[count] = j; 3526 cmap1[count++] = i; /* column index in submat */ 3527 } 3528 } 3529 } 3530 } 3531 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3532 3533 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3534 PetscCall(ISGetBlockSize(iscol,&cbs)); 3535 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3536 3537 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3538 } 3539 3540 /* (3) Create sequential Msub */ 3541 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3542 } 3543 3544 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3545 aij = (Mat_SeqAIJ*)(Msub)->data; 3546 ii = aij->i; 3547 PetscCall(ISGetIndices(iscmap,&cmap)); 3548 3549 /* 3550 m - number of local rows 3551 Ncols - number of columns (same on all processors) 3552 rstart - first row in new global matrix generated 3553 */ 3554 PetscCall(MatGetSize(Msub,&m,NULL)); 3555 3556 if (call == MAT_INITIAL_MATRIX) { 3557 /* (4) Create parallel newmat */ 3558 PetscMPIInt rank,size; 3559 PetscInt csize; 3560 3561 PetscCallMPI(MPI_Comm_size(comm,&size)); 3562 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3563 3564 /* 3565 Determine the number of non-zeros in the diagonal and off-diagonal 3566 portions of the matrix in order to do correct preallocation 3567 */ 3568 3569 /* first get start and end of "diagonal" columns */ 3570 PetscCall(ISGetLocalSize(iscol,&csize)); 3571 if (csize == PETSC_DECIDE) { 3572 PetscCall(ISGetSize(isrow,&mglobal)); 3573 if (mglobal == Ncols) { /* square matrix */ 3574 nlocal = m; 3575 } else { 3576 nlocal = Ncols/size + ((Ncols % size) > rank); 3577 } 3578 } else { 3579 nlocal = csize; 3580 } 3581 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3582 rstart = rend - nlocal; 3583 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3584 3585 /* next, compute all the lengths */ 3586 jj = aij->j; 3587 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3588 olens = dlens + m; 3589 for (i=0; i<m; i++) { 3590 jend = ii[i+1] - ii[i]; 3591 olen = 0; 3592 dlen = 0; 3593 for (j=0; j<jend; j++) { 3594 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3595 else dlen++; 3596 jj++; 3597 } 3598 olens[i] = olen; 3599 dlens[i] = dlen; 3600 } 3601 3602 PetscCall(ISGetBlockSize(isrow,&bs)); 3603 PetscCall(ISGetBlockSize(iscol,&cbs)); 3604 3605 PetscCall(MatCreate(comm,&M)); 3606 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3607 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3608 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3609 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3610 PetscCall(PetscFree(dlens)); 3611 3612 } else { /* call == MAT_REUSE_MATRIX */ 3613 M = *newmat; 3614 PetscCall(MatGetLocalSize(M,&i,NULL)); 3615 PetscCheckFalse(i != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3616 PetscCall(MatZeroEntries(M)); 3617 /* 3618 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3619 rather than the slower MatSetValues(). 3620 */ 3621 M->was_assembled = PETSC_TRUE; 3622 M->assembled = PETSC_FALSE; 3623 } 3624 3625 /* (5) Set values of Msub to *newmat */ 3626 PetscCall(PetscMalloc1(count,&colsub)); 3627 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3628 3629 jj = aij->j; 3630 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3631 for (i=0; i<m; i++) { 3632 row = rstart + i; 3633 nz = ii[i+1] - ii[i]; 3634 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3635 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3636 jj += nz; aa += nz; 3637 } 3638 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3639 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3640 3641 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3642 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3643 3644 PetscCall(PetscFree(colsub)); 3645 3646 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3647 if (call == MAT_INITIAL_MATRIX) { 3648 *newmat = M; 3649 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3650 PetscCall(MatDestroy(&Msub)); 3651 3652 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3653 PetscCall(ISDestroy(&iscol_sub)); 3654 3655 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3656 PetscCall(ISDestroy(&iscmap)); 3657 3658 if (iscol_local) { 3659 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3660 PetscCall(ISDestroy(&iscol_local)); 3661 } 3662 } 3663 PetscFunctionReturn(0); 3664 } 3665 3666 /* 3667 Not great since it makes two copies of the submatrix, first an SeqAIJ 3668 in local and then by concatenating the local matrices the end result. 3669 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3670 3671 Note: This requires a sequential iscol with all indices. 3672 */ 3673 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3674 { 3675 PetscMPIInt rank,size; 3676 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3677 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3678 Mat M,Mreuse; 3679 MatScalar *aa,*vwork; 3680 MPI_Comm comm; 3681 Mat_SeqAIJ *aij; 3682 PetscBool colflag,allcolumns=PETSC_FALSE; 3683 3684 PetscFunctionBegin; 3685 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3686 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3687 PetscCallMPI(MPI_Comm_size(comm,&size)); 3688 3689 /* Check for special case: each processor gets entire matrix columns */ 3690 PetscCall(ISIdentity(iscol,&colflag)); 3691 PetscCall(ISGetLocalSize(iscol,&n)); 3692 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3693 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3694 3695 if (call == MAT_REUSE_MATRIX) { 3696 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3697 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3698 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3699 } else { 3700 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3701 } 3702 3703 /* 3704 m - number of local rows 3705 n - number of columns (same on all processors) 3706 rstart - first row in new global matrix generated 3707 */ 3708 PetscCall(MatGetSize(Mreuse,&m,&n)); 3709 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3710 if (call == MAT_INITIAL_MATRIX) { 3711 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3712 ii = aij->i; 3713 jj = aij->j; 3714 3715 /* 3716 Determine the number of non-zeros in the diagonal and off-diagonal 3717 portions of the matrix in order to do correct preallocation 3718 */ 3719 3720 /* first get start and end of "diagonal" columns */ 3721 if (csize == PETSC_DECIDE) { 3722 PetscCall(ISGetSize(isrow,&mglobal)); 3723 if (mglobal == n) { /* square matrix */ 3724 nlocal = m; 3725 } else { 3726 nlocal = n/size + ((n % size) > rank); 3727 } 3728 } else { 3729 nlocal = csize; 3730 } 3731 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3732 rstart = rend - nlocal; 3733 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3734 3735 /* next, compute all the lengths */ 3736 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3737 olens = dlens + m; 3738 for (i=0; i<m; i++) { 3739 jend = ii[i+1] - ii[i]; 3740 olen = 0; 3741 dlen = 0; 3742 for (j=0; j<jend; j++) { 3743 if (*jj < rstart || *jj >= rend) olen++; 3744 else dlen++; 3745 jj++; 3746 } 3747 olens[i] = olen; 3748 dlens[i] = dlen; 3749 } 3750 PetscCall(MatCreate(comm,&M)); 3751 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3752 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3753 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3754 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3755 PetscCall(PetscFree(dlens)); 3756 } else { 3757 PetscInt ml,nl; 3758 3759 M = *newmat; 3760 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3761 PetscCheckFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3762 PetscCall(MatZeroEntries(M)); 3763 /* 3764 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3765 rather than the slower MatSetValues(). 3766 */ 3767 M->was_assembled = PETSC_TRUE; 3768 M->assembled = PETSC_FALSE; 3769 } 3770 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3771 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3772 ii = aij->i; 3773 jj = aij->j; 3774 3775 /* trigger copy to CPU if needed */ 3776 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3777 for (i=0; i<m; i++) { 3778 row = rstart + i; 3779 nz = ii[i+1] - ii[i]; 3780 cwork = jj; jj += nz; 3781 vwork = aa; aa += nz; 3782 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3783 } 3784 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3785 3786 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3787 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3788 *newmat = M; 3789 3790 /* save submatrix used in processor for next request */ 3791 if (call == MAT_INITIAL_MATRIX) { 3792 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3793 PetscCall(MatDestroy(&Mreuse)); 3794 } 3795 PetscFunctionReturn(0); 3796 } 3797 3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3799 { 3800 PetscInt m,cstart, cend,j,nnz,i,d; 3801 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3802 const PetscInt *JJ; 3803 PetscBool nooffprocentries; 3804 3805 PetscFunctionBegin; 3806 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3807 3808 PetscCall(PetscLayoutSetUp(B->rmap)); 3809 PetscCall(PetscLayoutSetUp(B->cmap)); 3810 m = B->rmap->n; 3811 cstart = B->cmap->rstart; 3812 cend = B->cmap->rend; 3813 rstart = B->rmap->rstart; 3814 3815 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3816 3817 if (PetscDefined(USE_DEBUG)) { 3818 for (i=0; i<m; i++) { 3819 nnz = Ii[i+1]- Ii[i]; 3820 JJ = J + Ii[i]; 3821 PetscCheckFalse(nnz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3822 PetscCheckFalse(nnz && (JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3823 PetscCheckFalse(nnz && (JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3824 } 3825 } 3826 3827 for (i=0; i<m; i++) { 3828 nnz = Ii[i+1]- Ii[i]; 3829 JJ = J + Ii[i]; 3830 nnz_max = PetscMax(nnz_max,nnz); 3831 d = 0; 3832 for (j=0; j<nnz; j++) { 3833 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3834 } 3835 d_nnz[i] = d; 3836 o_nnz[i] = nnz - d; 3837 } 3838 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3839 PetscCall(PetscFree2(d_nnz,o_nnz)); 3840 3841 for (i=0; i<m; i++) { 3842 ii = i + rstart; 3843 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3844 } 3845 nooffprocentries = B->nooffprocentries; 3846 B->nooffprocentries = PETSC_TRUE; 3847 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3848 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3849 B->nooffprocentries = nooffprocentries; 3850 3851 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3852 PetscFunctionReturn(0); 3853 } 3854 3855 /*@ 3856 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3857 (the default parallel PETSc format). 3858 3859 Collective 3860 3861 Input Parameters: 3862 + B - the matrix 3863 . i - the indices into j for the start of each local row (starts with zero) 3864 . j - the column indices for each local row (starts with zero) 3865 - v - optional values in the matrix 3866 3867 Level: developer 3868 3869 Notes: 3870 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 The format which is used for the sparse matrix input, is equivalent to a 3877 row-major ordering.. i.e for the following matrix, the input data expected is 3878 as shown 3879 3880 $ 1 0 0 3881 $ 2 0 3 P0 3882 $ ------- 3883 $ 4 5 6 P1 3884 $ 3885 $ Process0 [P0]: rows_owned=[0,1] 3886 $ i = {0,1,3} [size = nrow+1 = 2+1] 3887 $ j = {0,0,2} [size = 3] 3888 $ v = {1,2,3} [size = 3] 3889 $ 3890 $ Process1 [P1]: rows_owned=[2] 3891 $ i = {0,3} [size = nrow+1 = 1+1] 3892 $ j = {0,1,2} [size = 3] 3893 $ v = {4,5,6} [size = 3] 3894 3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3896 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3897 @*/ 3898 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3899 { 3900 PetscFunctionBegin; 3901 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3902 PetscFunctionReturn(0); 3903 } 3904 3905 /*@C 3906 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3907 (the default parallel PETSc format). For good matrix assembly performance 3908 the user should preallocate the matrix storage by setting the parameters 3909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3910 performance can be increased by more than a factor of 50. 3911 3912 Collective 3913 3914 Input Parameters: 3915 + B - the matrix 3916 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3917 (same value is used for all local rows) 3918 . d_nnz - array containing the number of nonzeros in the various rows of the 3919 DIAGONAL portion of the local submatrix (possibly different for each row) 3920 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3921 The size of this array is equal to the number of local rows, i.e 'm'. 3922 For matrices that will be factored, you must leave room for (and set) 3923 the diagonal entry even if it is zero. 3924 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3925 submatrix (same value is used for all local rows). 3926 - o_nnz - array containing the number of nonzeros in the various rows of the 3927 OFF-DIAGONAL portion of the local submatrix (possibly different for 3928 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3929 structure. The size of this array is equal to the number 3930 of local rows, i.e 'm'. 3931 3932 If the *_nnz parameter is given then the *_nz parameter is ignored 3933 3934 The AIJ format (also called the Yale sparse matrix format or 3935 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3936 storage. The stored row and column indices begin with zero. 3937 See Users-Manual: ch_mat for details. 3938 3939 The parallel matrix is partitioned such that the first m0 rows belong to 3940 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3941 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3942 3943 The DIAGONAL portion of the local submatrix of a processor can be defined 3944 as the submatrix which is obtained by extraction the part corresponding to 3945 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3946 first row that belongs to the processor, r2 is the last row belonging to 3947 the this processor, and c1-c2 is range of indices of the local part of a 3948 vector suitable for applying the matrix to. This is an mxn matrix. In the 3949 common case of a square matrix, the row and column ranges are the same and 3950 the DIAGONAL part is also square. The remaining portion of the local 3951 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3952 3953 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3954 3955 You can call MatGetInfo() to get information on how effective the preallocation was; 3956 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3957 You can also run with the option -info and look for messages with the string 3958 malloc in them to see if additional memory allocation was needed. 3959 3960 Example usage: 3961 3962 Consider the following 8x8 matrix with 34 non-zero values, that is 3963 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3964 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3965 as follows: 3966 3967 .vb 3968 1 2 0 | 0 3 0 | 0 4 3969 Proc0 0 5 6 | 7 0 0 | 8 0 3970 9 0 10 | 11 0 0 | 12 0 3971 ------------------------------------- 3972 13 0 14 | 15 16 17 | 0 0 3973 Proc1 0 18 0 | 19 20 21 | 0 0 3974 0 0 0 | 22 23 0 | 24 0 3975 ------------------------------------- 3976 Proc2 25 26 27 | 0 0 28 | 29 0 3977 30 0 0 | 31 32 33 | 0 34 3978 .ve 3979 3980 This can be represented as a collection of submatrices as: 3981 3982 .vb 3983 A B C 3984 D E F 3985 G H I 3986 .ve 3987 3988 Where the submatrices A,B,C are owned by proc0, D,E,F are 3989 owned by proc1, G,H,I are owned by proc2. 3990 3991 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3992 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'M','N' parameters are 8,8, and have the same values on all procs. 3994 3995 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3996 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3997 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3998 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3999 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4000 matrix, ans [DF] as another SeqAIJ matrix. 4001 4002 When d_nz, o_nz parameters are specified, d_nz storage elements are 4003 allocated for every row of the local diagonal submatrix, and o_nz 4004 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4005 One way to choose d_nz and o_nz is to use the max nonzerors per local 4006 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4007 In this case, the values of d_nz,o_nz are: 4008 .vb 4009 proc0 : dnz = 2, o_nz = 2 4010 proc1 : dnz = 3, o_nz = 2 4011 proc2 : dnz = 1, o_nz = 4 4012 .ve 4013 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4014 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4015 for proc3. i.e we are using 12+15+10=37 storage locations to store 4016 34 values. 4017 4018 When d_nnz, o_nnz parameters are specified, the storage is specified 4019 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4020 In the above case the values for d_nnz,o_nnz are: 4021 .vb 4022 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4023 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4024 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4025 .ve 4026 Here the space allocated is sum of all the above values i.e 34, and 4027 hence pre-allocation is perfect. 4028 4029 Level: intermediate 4030 4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4032 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4033 @*/ 4034 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4035 { 4036 PetscFunctionBegin; 4037 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4038 PetscValidType(B,1); 4039 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4040 PetscFunctionReturn(0); 4041 } 4042 4043 /*@ 4044 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4045 CSR format for the local rows. 4046 4047 Collective 4048 4049 Input Parameters: 4050 + comm - MPI communicator 4051 . m - number of local rows (Cannot be PETSC_DECIDE) 4052 . n - This value should be the same as the local size used in creating the 4053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4054 calculated if N is given) For square matrices n is almost always m. 4055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4057 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4058 . j - column indices 4059 - a - matrix values 4060 4061 Output Parameter: 4062 . mat - the matrix 4063 4064 Level: intermediate 4065 4066 Notes: 4067 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4068 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4069 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4070 4071 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4072 4073 The format which is used for the sparse matrix input, is equivalent to a 4074 row-major ordering.. i.e for the following matrix, the input data expected is 4075 as shown 4076 4077 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4078 4079 $ 1 0 0 4080 $ 2 0 3 P0 4081 $ ------- 4082 $ 4 5 6 P1 4083 $ 4084 $ Process0 [P0]: rows_owned=[0,1] 4085 $ i = {0,1,3} [size = nrow+1 = 2+1] 4086 $ j = {0,0,2} [size = 3] 4087 $ v = {1,2,3} [size = 3] 4088 $ 4089 $ Process1 [P1]: rows_owned=[2] 4090 $ i = {0,3} [size = nrow+1 = 1+1] 4091 $ j = {0,1,2} [size = 3] 4092 $ v = {4,5,6} [size = 3] 4093 4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4095 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4096 @*/ 4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4098 { 4099 PetscFunctionBegin; 4100 PetscCheckFalse(i && i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4101 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4102 PetscCall(MatCreate(comm,mat)); 4103 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4104 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4105 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4106 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4107 PetscFunctionReturn(0); 4108 } 4109 4110 /*@ 4111 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4112 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4113 4114 Collective 4115 4116 Input Parameters: 4117 + mat - the matrix 4118 . m - number of local rows (Cannot be PETSC_DECIDE) 4119 . n - This value should be the same as the local size used in creating the 4120 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4121 calculated if N is given) For square matrices n is almost always m. 4122 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4123 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4124 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4125 . J - column indices 4126 - v - matrix values 4127 4128 Level: intermediate 4129 4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4131 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4132 @*/ 4133 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4134 { 4135 PetscInt cstart,nnz,i,j; 4136 PetscInt *ld; 4137 PetscBool nooffprocentries; 4138 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4139 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4140 PetscScalar *ad,*ao; 4141 const PetscInt *Adi = Ad->i; 4142 PetscInt ldi,Iii,md; 4143 4144 PetscFunctionBegin; 4145 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4146 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4147 PetscCheckFalse(m != mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4148 PetscCheckFalse(n != mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4149 4150 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4151 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4152 cstart = mat->cmap->rstart; 4153 if (!Aij->ld) { 4154 /* count number of entries below block diagonal */ 4155 PetscCall(PetscCalloc1(m,&ld)); 4156 Aij->ld = ld; 4157 for (i=0; i<m; i++) { 4158 nnz = Ii[i+1]- Ii[i]; 4159 j = 0; 4160 while (J[j] < cstart && j < nnz) {j++;} 4161 J += nnz; 4162 ld[i] = j; 4163 } 4164 } else { 4165 ld = Aij->ld; 4166 } 4167 4168 for (i=0; i<m; i++) { 4169 nnz = Ii[i+1]- Ii[i]; 4170 Iii = Ii[i]; 4171 ldi = ld[i]; 4172 md = Adi[i+1]-Adi[i]; 4173 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4174 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4175 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4176 ad += md; 4177 ao += nnz - md; 4178 } 4179 nooffprocentries = mat->nooffprocentries; 4180 mat->nooffprocentries = PETSC_TRUE; 4181 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4182 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4183 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4184 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4185 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4186 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4187 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4188 mat->nooffprocentries = nooffprocentries; 4189 PetscFunctionReturn(0); 4190 } 4191 4192 /*@C 4193 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4194 (the default parallel PETSc format). For good matrix assembly performance 4195 the user should preallocate the matrix storage by setting the parameters 4196 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4197 performance can be increased by more than a factor of 50. 4198 4199 Collective 4200 4201 Input Parameters: 4202 + comm - MPI communicator 4203 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4204 This value should be the same as the local size used in creating the 4205 y vector for the matrix-vector product y = Ax. 4206 . n - This value should be the same as the local size used in creating the 4207 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4208 calculated if N is given) For square matrices n is almost always m. 4209 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4210 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4211 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4212 (same value is used for all local rows) 4213 . d_nnz - array containing the number of nonzeros in the various rows of the 4214 DIAGONAL portion of the local submatrix (possibly different for each row) 4215 or NULL, if d_nz is used to specify the nonzero structure. 4216 The size of this array is equal to the number of local rows, i.e 'm'. 4217 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4218 submatrix (same value is used for all local rows). 4219 - o_nnz - array containing the number of nonzeros in the various rows of the 4220 OFF-DIAGONAL portion of the local submatrix (possibly different for 4221 each row) or NULL, if o_nz is used to specify the nonzero 4222 structure. The size of this array is equal to the number 4223 of local rows, i.e 'm'. 4224 4225 Output Parameter: 4226 . A - the matrix 4227 4228 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4229 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4230 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4231 4232 Notes: 4233 If the *_nnz parameter is given then the *_nz parameter is ignored 4234 4235 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4236 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4237 storage requirements for this matrix. 4238 4239 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4240 processor than it must be used on all processors that share the object for 4241 that argument. 4242 4243 The user MUST specify either the local or global matrix dimensions 4244 (possibly both). 4245 4246 The parallel matrix is partitioned across processors such that the 4247 first m0 rows belong to process 0, the next m1 rows belong to 4248 process 1, the next m2 rows belong to process 2 etc.. where 4249 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4250 values corresponding to [m x N] submatrix. 4251 4252 The columns are logically partitioned with the n0 columns belonging 4253 to 0th partition, the next n1 columns belonging to the next 4254 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4255 4256 The DIAGONAL portion of the local submatrix on any given processor 4257 is the submatrix corresponding to the rows and columns m,n 4258 corresponding to the given processor. i.e diagonal matrix on 4259 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4260 etc. The remaining portion of the local submatrix [m x (N-n)] 4261 constitute the OFF-DIAGONAL portion. The example below better 4262 illustrates this concept. 4263 4264 For a square global matrix we define each processor's diagonal portion 4265 to be its local rows and the corresponding columns (a square submatrix); 4266 each processor's off-diagonal portion encompasses the remainder of the 4267 local matrix (a rectangular submatrix). 4268 4269 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4270 4271 When calling this routine with a single process communicator, a matrix of 4272 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4273 type of communicator, use the construction mechanism 4274 .vb 4275 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4276 .ve 4277 4278 $ MatCreate(...,&A); 4279 $ MatSetType(A,MATMPIAIJ); 4280 $ MatSetSizes(A, m,n,M,N); 4281 $ MatMPIAIJSetPreallocation(A,...); 4282 4283 By default, this format uses inodes (identical nodes) when possible. 4284 We search for consecutive rows with the same nonzero structure, thereby 4285 reusing matrix information to achieve increased efficiency. 4286 4287 Options Database Keys: 4288 + -mat_no_inode - Do not use inodes 4289 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4290 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4291 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4292 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4293 4294 Example usage: 4295 4296 Consider the following 8x8 matrix with 34 non-zero values, that is 4297 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4298 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4299 as follows 4300 4301 .vb 4302 1 2 0 | 0 3 0 | 0 4 4303 Proc0 0 5 6 | 7 0 0 | 8 0 4304 9 0 10 | 11 0 0 | 12 0 4305 ------------------------------------- 4306 13 0 14 | 15 16 17 | 0 0 4307 Proc1 0 18 0 | 19 20 21 | 0 0 4308 0 0 0 | 22 23 0 | 24 0 4309 ------------------------------------- 4310 Proc2 25 26 27 | 0 0 28 | 29 0 4311 30 0 0 | 31 32 33 | 0 34 4312 .ve 4313 4314 This can be represented as a collection of submatrices as 4315 4316 .vb 4317 A B C 4318 D E F 4319 G H I 4320 .ve 4321 4322 Where the submatrices A,B,C are owned by proc0, D,E,F are 4323 owned by proc1, G,H,I are owned by proc2. 4324 4325 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4326 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4327 The 'M','N' parameters are 8,8, and have the same values on all procs. 4328 4329 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4330 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4331 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4332 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4333 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4334 matrix, ans [DF] as another SeqAIJ matrix. 4335 4336 When d_nz, o_nz parameters are specified, d_nz storage elements are 4337 allocated for every row of the local diagonal submatrix, and o_nz 4338 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4339 One way to choose d_nz and o_nz is to use the max nonzerors per local 4340 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4341 In this case, the values of d_nz,o_nz are 4342 .vb 4343 proc0 : dnz = 2, o_nz = 2 4344 proc1 : dnz = 3, o_nz = 2 4345 proc2 : dnz = 1, o_nz = 4 4346 .ve 4347 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4348 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4349 for proc3. i.e we are using 12+15+10=37 storage locations to store 4350 34 values. 4351 4352 When d_nnz, o_nnz parameters are specified, the storage is specified 4353 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4354 In the above case the values for d_nnz,o_nnz are 4355 .vb 4356 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4357 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4358 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4359 .ve 4360 Here the space allocated is sum of all the above values i.e 34, and 4361 hence pre-allocation is perfect. 4362 4363 Level: intermediate 4364 4365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4366 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4367 @*/ 4368 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4369 { 4370 PetscMPIInt size; 4371 4372 PetscFunctionBegin; 4373 PetscCall(MatCreate(comm,A)); 4374 PetscCall(MatSetSizes(*A,m,n,M,N)); 4375 PetscCallMPI(MPI_Comm_size(comm,&size)); 4376 if (size > 1) { 4377 PetscCall(MatSetType(*A,MATMPIAIJ)); 4378 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4379 } else { 4380 PetscCall(MatSetType(*A,MATSEQAIJ)); 4381 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4382 } 4383 PetscFunctionReturn(0); 4384 } 4385 4386 /*@C 4387 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4388 4389 Not collective 4390 4391 Input Parameter: 4392 . A - The MPIAIJ matrix 4393 4394 Output Parameters: 4395 + Ad - The local diagonal block as a SeqAIJ matrix 4396 . Ao - The local off-diagonal block as a SeqAIJ matrix 4397 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4398 4399 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4400 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4401 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4402 local column numbers to global column numbers in the original matrix. 4403 4404 Level: intermediate 4405 4406 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4407 @*/ 4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4409 { 4410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4411 PetscBool flg; 4412 4413 PetscFunctionBegin; 4414 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4415 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4416 if (Ad) *Ad = a->A; 4417 if (Ao) *Ao = a->B; 4418 if (colmap) *colmap = a->garray; 4419 PetscFunctionReturn(0); 4420 } 4421 4422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4423 { 4424 PetscErrorCode ierr; 4425 PetscInt m,N,i,rstart,nnz,Ii; 4426 PetscInt *indx; 4427 PetscScalar *values; 4428 MatType rootType; 4429 4430 PetscFunctionBegin; 4431 PetscCall(MatGetSize(inmat,&m,&N)); 4432 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4433 PetscInt *dnz,*onz,sum,bs,cbs; 4434 4435 if (n == PETSC_DECIDE) { 4436 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4437 } 4438 /* Check sum(n) = N */ 4439 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4440 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4441 4442 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4443 rstart -= m; 4444 4445 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr); 4446 for (i=0; i<m; i++) { 4447 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4448 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4449 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4450 } 4451 4452 PetscCall(MatCreate(comm,outmat)); 4453 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4454 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4455 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4456 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4457 PetscCall(MatSetType(*outmat,rootType)); 4458 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4459 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4460 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 4461 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4462 } 4463 4464 /* numeric phase */ 4465 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4466 for (i=0; i<m; i++) { 4467 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4468 Ii = i + rstart; 4469 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4470 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4471 } 4472 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4473 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4478 { 4479 PetscMPIInt rank; 4480 PetscInt m,N,i,rstart,nnz; 4481 size_t len; 4482 const PetscInt *indx; 4483 PetscViewer out; 4484 char *name; 4485 Mat B; 4486 const PetscScalar *values; 4487 4488 PetscFunctionBegin; 4489 PetscCall(MatGetLocalSize(A,&m,NULL)); 4490 PetscCall(MatGetSize(A,NULL,&N)); 4491 /* Should this be the type of the diagonal block of A? */ 4492 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4493 PetscCall(MatSetSizes(B,m,N,m,N)); 4494 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4495 PetscCall(MatSetType(B,MATSEQAIJ)); 4496 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4497 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4498 for (i=0; i<m; i++) { 4499 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4500 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4501 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4502 } 4503 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4504 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4505 4506 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4507 PetscCall(PetscStrlen(outfile,&len)); 4508 PetscCall(PetscMalloc1(len+6,&name)); 4509 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4510 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4511 PetscCall(PetscFree(name)); 4512 PetscCall(MatView(B,out)); 4513 PetscCall(PetscViewerDestroy(&out)); 4514 PetscCall(MatDestroy(&B)); 4515 PetscFunctionReturn(0); 4516 } 4517 4518 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4519 { 4520 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4521 4522 PetscFunctionBegin; 4523 if (!merge) PetscFunctionReturn(0); 4524 PetscCall(PetscFree(merge->id_r)); 4525 PetscCall(PetscFree(merge->len_s)); 4526 PetscCall(PetscFree(merge->len_r)); 4527 PetscCall(PetscFree(merge->bi)); 4528 PetscCall(PetscFree(merge->bj)); 4529 PetscCall(PetscFree(merge->buf_ri[0])); 4530 PetscCall(PetscFree(merge->buf_ri)); 4531 PetscCall(PetscFree(merge->buf_rj[0])); 4532 PetscCall(PetscFree(merge->buf_rj)); 4533 PetscCall(PetscFree(merge->coi)); 4534 PetscCall(PetscFree(merge->coj)); 4535 PetscCall(PetscFree(merge->owners_co)); 4536 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4537 PetscCall(PetscFree(merge)); 4538 PetscFunctionReturn(0); 4539 } 4540 4541 #include <../src/mat/utils/freespace.h> 4542 #include <petscbt.h> 4543 4544 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4545 { 4546 MPI_Comm comm; 4547 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4548 PetscMPIInt size,rank,taga,*len_s; 4549 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4550 PetscInt proc,m; 4551 PetscInt **buf_ri,**buf_rj; 4552 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4553 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4554 MPI_Request *s_waits,*r_waits; 4555 MPI_Status *status; 4556 const MatScalar *aa,*a_a; 4557 MatScalar **abuf_r,*ba_i; 4558 Mat_Merge_SeqsToMPI *merge; 4559 PetscContainer container; 4560 4561 PetscFunctionBegin; 4562 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4563 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4564 4565 PetscCallMPI(MPI_Comm_size(comm,&size)); 4566 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4567 4568 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4569 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4570 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4571 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4572 aa = a_a; 4573 4574 bi = merge->bi; 4575 bj = merge->bj; 4576 buf_ri = merge->buf_ri; 4577 buf_rj = merge->buf_rj; 4578 4579 PetscCall(PetscMalloc1(size,&status)); 4580 owners = merge->rowmap->range; 4581 len_s = merge->len_s; 4582 4583 /* send and recv matrix values */ 4584 /*-----------------------------*/ 4585 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4586 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4587 4588 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4589 for (proc=0,k=0; proc<size; proc++) { 4590 if (!len_s[proc]) continue; 4591 i = owners[proc]; 4592 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4593 k++; 4594 } 4595 4596 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4597 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4598 PetscCall(PetscFree(status)); 4599 4600 PetscCall(PetscFree(s_waits)); 4601 PetscCall(PetscFree(r_waits)); 4602 4603 /* insert mat values of mpimat */ 4604 /*----------------------------*/ 4605 PetscCall(PetscMalloc1(N,&ba_i)); 4606 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4607 4608 for (k=0; k<merge->nrecv; k++) { 4609 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4610 nrows = *(buf_ri_k[k]); 4611 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4612 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4613 } 4614 4615 /* set values of ba */ 4616 m = merge->rowmap->n; 4617 for (i=0; i<m; i++) { 4618 arow = owners[rank] + i; 4619 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4620 bnzi = bi[i+1] - bi[i]; 4621 PetscCall(PetscArrayzero(ba_i,bnzi)); 4622 4623 /* add local non-zero vals of this proc's seqmat into ba */ 4624 anzi = ai[arow+1] - ai[arow]; 4625 aj = a->j + ai[arow]; 4626 aa = a_a + ai[arow]; 4627 nextaj = 0; 4628 for (j=0; nextaj<anzi; j++) { 4629 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4630 ba_i[j] += aa[nextaj++]; 4631 } 4632 } 4633 4634 /* add received vals into ba */ 4635 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4636 /* i-th row */ 4637 if (i == *nextrow[k]) { 4638 anzi = *(nextai[k]+1) - *nextai[k]; 4639 aj = buf_rj[k] + *(nextai[k]); 4640 aa = abuf_r[k] + *(nextai[k]); 4641 nextaj = 0; 4642 for (j=0; nextaj<anzi; j++) { 4643 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4644 ba_i[j] += aa[nextaj++]; 4645 } 4646 } 4647 nextrow[k]++; nextai[k]++; 4648 } 4649 } 4650 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4651 } 4652 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4653 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4654 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4655 4656 PetscCall(PetscFree(abuf_r[0])); 4657 PetscCall(PetscFree(abuf_r)); 4658 PetscCall(PetscFree(ba_i)); 4659 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4660 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4661 PetscFunctionReturn(0); 4662 } 4663 4664 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4665 { 4666 PetscErrorCode ierr; 4667 Mat B_mpi; 4668 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4669 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4670 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4671 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4672 PetscInt len,proc,*dnz,*onz,bs,cbs; 4673 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4674 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4675 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4676 MPI_Status *status; 4677 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4678 PetscBT lnkbt; 4679 Mat_Merge_SeqsToMPI *merge; 4680 PetscContainer container; 4681 4682 PetscFunctionBegin; 4683 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4684 4685 /* make sure it is a PETSc comm */ 4686 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4687 PetscCallMPI(MPI_Comm_size(comm,&size)); 4688 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4689 4690 PetscCall(PetscNew(&merge)); 4691 PetscCall(PetscMalloc1(size,&status)); 4692 4693 /* determine row ownership */ 4694 /*---------------------------------------------------------*/ 4695 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4696 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4697 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4698 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4699 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4700 PetscCall(PetscMalloc1(size,&len_si)); 4701 PetscCall(PetscMalloc1(size,&merge->len_s)); 4702 4703 m = merge->rowmap->n; 4704 owners = merge->rowmap->range; 4705 4706 /* determine the number of messages to send, their lengths */ 4707 /*---------------------------------------------------------*/ 4708 len_s = merge->len_s; 4709 4710 len = 0; /* length of buf_si[] */ 4711 merge->nsend = 0; 4712 for (proc=0; proc<size; proc++) { 4713 len_si[proc] = 0; 4714 if (proc == rank) { 4715 len_s[proc] = 0; 4716 } else { 4717 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4718 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4719 } 4720 if (len_s[proc]) { 4721 merge->nsend++; 4722 nrows = 0; 4723 for (i=owners[proc]; i<owners[proc+1]; i++) { 4724 if (ai[i+1] > ai[i]) nrows++; 4725 } 4726 len_si[proc] = 2*(nrows+1); 4727 len += len_si[proc]; 4728 } 4729 } 4730 4731 /* determine the number and length of messages to receive for ij-structure */ 4732 /*-------------------------------------------------------------------------*/ 4733 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4734 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4735 4736 /* post the Irecv of j-structure */ 4737 /*-------------------------------*/ 4738 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4739 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4740 4741 /* post the Isend of j-structure */ 4742 /*--------------------------------*/ 4743 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4744 4745 for (proc=0, k=0; proc<size; proc++) { 4746 if (!len_s[proc]) continue; 4747 i = owners[proc]; 4748 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4749 k++; 4750 } 4751 4752 /* receives and sends of j-structure are complete */ 4753 /*------------------------------------------------*/ 4754 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4755 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4756 4757 /* send and recv i-structure */ 4758 /*---------------------------*/ 4759 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4760 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4761 4762 PetscCall(PetscMalloc1(len+1,&buf_s)); 4763 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4764 for (proc=0,k=0; proc<size; proc++) { 4765 if (!len_s[proc]) continue; 4766 /* form outgoing message for i-structure: 4767 buf_si[0]: nrows to be sent 4768 [1:nrows]: row index (global) 4769 [nrows+1:2*nrows+1]: i-structure index 4770 */ 4771 /*-------------------------------------------*/ 4772 nrows = len_si[proc]/2 - 1; 4773 buf_si_i = buf_si + nrows+1; 4774 buf_si[0] = nrows; 4775 buf_si_i[0] = 0; 4776 nrows = 0; 4777 for (i=owners[proc]; i<owners[proc+1]; i++) { 4778 anzi = ai[i+1] - ai[i]; 4779 if (anzi) { 4780 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4781 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4782 nrows++; 4783 } 4784 } 4785 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4786 k++; 4787 buf_si += len_si[proc]; 4788 } 4789 4790 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4791 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4792 4793 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4794 for (i=0; i<merge->nrecv; i++) { 4795 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4796 } 4797 4798 PetscCall(PetscFree(len_si)); 4799 PetscCall(PetscFree(len_ri)); 4800 PetscCall(PetscFree(rj_waits)); 4801 PetscCall(PetscFree2(si_waits,sj_waits)); 4802 PetscCall(PetscFree(ri_waits)); 4803 PetscCall(PetscFree(buf_s)); 4804 PetscCall(PetscFree(status)); 4805 4806 /* compute a local seq matrix in each processor */ 4807 /*----------------------------------------------*/ 4808 /* allocate bi array and free space for accumulating nonzero column info */ 4809 PetscCall(PetscMalloc1(m+1,&bi)); 4810 bi[0] = 0; 4811 4812 /* create and initialize a linked list */ 4813 nlnk = N+1; 4814 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4815 4816 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4817 len = ai[owners[rank+1]] - ai[owners[rank]]; 4818 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4819 4820 current_space = free_space; 4821 4822 /* determine symbolic info for each local row */ 4823 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4824 4825 for (k=0; k<merge->nrecv; k++) { 4826 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4827 nrows = *buf_ri_k[k]; 4828 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4829 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4830 } 4831 4832 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);PetscCall(ierr); 4833 len = 0; 4834 for (i=0; i<m; i++) { 4835 bnzi = 0; 4836 /* add local non-zero cols of this proc's seqmat into lnk */ 4837 arow = owners[rank] + i; 4838 anzi = ai[arow+1] - ai[arow]; 4839 aj = a->j + ai[arow]; 4840 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4841 bnzi += nlnk; 4842 /* add received col data into lnk */ 4843 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4844 if (i == *nextrow[k]) { /* i-th row */ 4845 anzi = *(nextai[k]+1) - *nextai[k]; 4846 aj = buf_rj[k] + *nextai[k]; 4847 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4848 bnzi += nlnk; 4849 nextrow[k]++; nextai[k]++; 4850 } 4851 } 4852 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4853 4854 /* if free space is not available, make more free space */ 4855 if (current_space->local_remaining<bnzi) { 4856 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4857 nspacedouble++; 4858 } 4859 /* copy data into free space, then initialize lnk */ 4860 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4861 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4862 4863 current_space->array += bnzi; 4864 current_space->local_used += bnzi; 4865 current_space->local_remaining -= bnzi; 4866 4867 bi[i+1] = bi[i] + bnzi; 4868 } 4869 4870 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4871 4872 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4873 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4874 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4875 4876 /* create symbolic parallel matrix B_mpi */ 4877 /*---------------------------------------*/ 4878 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4879 PetscCall(MatCreate(comm,&B_mpi)); 4880 if (n==PETSC_DECIDE) { 4881 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4882 } else { 4883 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4884 } 4885 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4886 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4887 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4888 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 4889 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4890 4891 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4892 B_mpi->assembled = PETSC_FALSE; 4893 merge->bi = bi; 4894 merge->bj = bj; 4895 merge->buf_ri = buf_ri; 4896 merge->buf_rj = buf_rj; 4897 merge->coi = NULL; 4898 merge->coj = NULL; 4899 merge->owners_co = NULL; 4900 4901 PetscCall(PetscCommDestroy(&comm)); 4902 4903 /* attach the supporting struct to B_mpi for reuse */ 4904 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4905 PetscCall(PetscContainerSetPointer(container,merge)); 4906 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4907 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4908 PetscCall(PetscContainerDestroy(&container)); 4909 *mpimat = B_mpi; 4910 4911 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4912 PetscFunctionReturn(0); 4913 } 4914 4915 /*@C 4916 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4917 matrices from each processor 4918 4919 Collective 4920 4921 Input Parameters: 4922 + comm - the communicators the parallel matrix will live on 4923 . seqmat - the input sequential matrices 4924 . m - number of local rows (or PETSC_DECIDE) 4925 . n - number of local columns (or PETSC_DECIDE) 4926 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4927 4928 Output Parameter: 4929 . mpimat - the parallel matrix generated 4930 4931 Level: advanced 4932 4933 Notes: 4934 The dimensions of the sequential matrix in each processor MUST be the same. 4935 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4936 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4937 @*/ 4938 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4939 { 4940 PetscMPIInt size; 4941 4942 PetscFunctionBegin; 4943 PetscCallMPI(MPI_Comm_size(comm,&size)); 4944 if (size == 1) { 4945 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4946 if (scall == MAT_INITIAL_MATRIX) { 4947 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4948 } else { 4949 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4950 } 4951 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4952 PetscFunctionReturn(0); 4953 } 4954 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4955 if (scall == MAT_INITIAL_MATRIX) { 4956 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4957 } 4958 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4959 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4960 PetscFunctionReturn(0); 4961 } 4962 4963 /*@ 4964 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4965 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4966 with MatGetSize() 4967 4968 Not Collective 4969 4970 Input Parameters: 4971 + A - the matrix 4972 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4973 4974 Output Parameter: 4975 . A_loc - the local sequential matrix generated 4976 4977 Level: developer 4978 4979 Notes: 4980 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4981 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4982 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4983 modify the values of the returned A_loc. 4984 4985 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 4986 @*/ 4987 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4988 { 4989 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4990 Mat_SeqAIJ *mat,*a,*b; 4991 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4992 const PetscScalar *aa,*ba,*aav,*bav; 4993 PetscScalar *ca,*cam; 4994 PetscMPIInt size; 4995 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4996 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4997 PetscBool match; 4998 4999 PetscFunctionBegin; 5000 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5001 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5002 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5003 if (size == 1) { 5004 if (scall == MAT_INITIAL_MATRIX) { 5005 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5006 *A_loc = mpimat->A; 5007 } else if (scall == MAT_REUSE_MATRIX) { 5008 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5009 } 5010 PetscFunctionReturn(0); 5011 } 5012 5013 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5014 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5015 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5016 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5017 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5018 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5019 aa = aav; 5020 ba = bav; 5021 if (scall == MAT_INITIAL_MATRIX) { 5022 PetscCall(PetscMalloc1(1+am,&ci)); 5023 ci[0] = 0; 5024 for (i=0; i<am; i++) { 5025 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5026 } 5027 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5028 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5029 k = 0; 5030 for (i=0; i<am; i++) { 5031 ncols_o = bi[i+1] - bi[i]; 5032 ncols_d = ai[i+1] - ai[i]; 5033 /* off-diagonal portion of A */ 5034 for (jo=0; jo<ncols_o; jo++) { 5035 col = cmap[*bj]; 5036 if (col >= cstart) break; 5037 cj[k] = col; bj++; 5038 ca[k++] = *ba++; 5039 } 5040 /* diagonal portion of A */ 5041 for (j=0; j<ncols_d; j++) { 5042 cj[k] = cstart + *aj++; 5043 ca[k++] = *aa++; 5044 } 5045 /* off-diagonal portion of A */ 5046 for (j=jo; j<ncols_o; j++) { 5047 cj[k] = cmap[*bj++]; 5048 ca[k++] = *ba++; 5049 } 5050 } 5051 /* put together the new matrix */ 5052 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5053 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5054 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5055 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5056 mat->free_a = PETSC_TRUE; 5057 mat->free_ij = PETSC_TRUE; 5058 mat->nonew = 0; 5059 } else if (scall == MAT_REUSE_MATRIX) { 5060 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5061 ci = mat->i; 5062 cj = mat->j; 5063 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5064 for (i=0; i<am; i++) { 5065 /* off-diagonal portion of A */ 5066 ncols_o = bi[i+1] - bi[i]; 5067 for (jo=0; jo<ncols_o; jo++) { 5068 col = cmap[*bj]; 5069 if (col >= cstart) break; 5070 *cam++ = *ba++; bj++; 5071 } 5072 /* diagonal portion of A */ 5073 ncols_d = ai[i+1] - ai[i]; 5074 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5075 /* off-diagonal portion of A */ 5076 for (j=jo; j<ncols_o; j++) { 5077 *cam++ = *ba++; bj++; 5078 } 5079 } 5080 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5081 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5082 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5083 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5084 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5085 PetscFunctionReturn(0); 5086 } 5087 5088 /*@ 5089 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5090 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5091 5092 Not Collective 5093 5094 Input Parameters: 5095 + A - the matrix 5096 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5097 5098 Output Parameters: 5099 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5100 - A_loc - the local sequential matrix generated 5101 5102 Level: developer 5103 5104 Notes: 5105 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5106 5107 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5108 5109 @*/ 5110 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5111 { 5112 Mat Ao,Ad; 5113 const PetscInt *cmap; 5114 PetscMPIInt size; 5115 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5116 5117 PetscFunctionBegin; 5118 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5119 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5120 if (size == 1) { 5121 if (scall == MAT_INITIAL_MATRIX) { 5122 PetscCall(PetscObjectReference((PetscObject)Ad)); 5123 *A_loc = Ad; 5124 } else if (scall == MAT_REUSE_MATRIX) { 5125 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5126 } 5127 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5128 PetscFunctionReturn(0); 5129 } 5130 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5131 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5132 if (f) { 5133 PetscCall((*f)(A,scall,glob,A_loc)); 5134 } else { 5135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5136 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5137 Mat_SeqAIJ *c; 5138 PetscInt *ai = a->i, *aj = a->j; 5139 PetscInt *bi = b->i, *bj = b->j; 5140 PetscInt *ci,*cj; 5141 const PetscScalar *aa,*ba; 5142 PetscScalar *ca; 5143 PetscInt i,j,am,dn,on; 5144 5145 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5146 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5147 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5148 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5149 if (scall == MAT_INITIAL_MATRIX) { 5150 PetscInt k; 5151 PetscCall(PetscMalloc1(1+am,&ci)); 5152 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5153 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5154 ci[0] = 0; 5155 for (i=0,k=0; i<am; i++) { 5156 const PetscInt ncols_o = bi[i+1] - bi[i]; 5157 const PetscInt ncols_d = ai[i+1] - ai[i]; 5158 ci[i+1] = ci[i] + ncols_o + ncols_d; 5159 /* diagonal portion of A */ 5160 for (j=0; j<ncols_d; j++,k++) { 5161 cj[k] = *aj++; 5162 ca[k] = *aa++; 5163 } 5164 /* off-diagonal portion of A */ 5165 for (j=0; j<ncols_o; j++,k++) { 5166 cj[k] = dn + *bj++; 5167 ca[k] = *ba++; 5168 } 5169 } 5170 /* put together the new matrix */ 5171 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5172 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5173 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5174 c = (Mat_SeqAIJ*)(*A_loc)->data; 5175 c->free_a = PETSC_TRUE; 5176 c->free_ij = PETSC_TRUE; 5177 c->nonew = 0; 5178 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5179 } else if (scall == MAT_REUSE_MATRIX) { 5180 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5181 for (i=0; i<am; i++) { 5182 const PetscInt ncols_d = ai[i+1] - ai[i]; 5183 const PetscInt ncols_o = bi[i+1] - bi[i]; 5184 /* diagonal portion of A */ 5185 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5186 /* off-diagonal portion of A */ 5187 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5188 } 5189 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5190 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5191 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5192 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5193 if (glob) { 5194 PetscInt cst, *gidx; 5195 5196 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5197 PetscCall(PetscMalloc1(dn+on,&gidx)); 5198 for (i=0; i<dn; i++) gidx[i] = cst + i; 5199 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5200 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5201 } 5202 } 5203 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5204 PetscFunctionReturn(0); 5205 } 5206 5207 /*@C 5208 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5209 5210 Not Collective 5211 5212 Input Parameters: 5213 + A - the matrix 5214 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5215 - row, col - index sets of rows and columns to extract (or NULL) 5216 5217 Output Parameter: 5218 . A_loc - the local sequential matrix generated 5219 5220 Level: developer 5221 5222 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5223 5224 @*/ 5225 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5226 { 5227 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5228 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5229 IS isrowa,iscola; 5230 Mat *aloc; 5231 PetscBool match; 5232 5233 PetscFunctionBegin; 5234 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5235 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5236 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5237 if (!row) { 5238 start = A->rmap->rstart; end = A->rmap->rend; 5239 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5240 } else { 5241 isrowa = *row; 5242 } 5243 if (!col) { 5244 start = A->cmap->rstart; 5245 cmap = a->garray; 5246 nzA = a->A->cmap->n; 5247 nzB = a->B->cmap->n; 5248 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5249 ncols = 0; 5250 for (i=0; i<nzB; i++) { 5251 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5252 else break; 5253 } 5254 imark = i; 5255 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5256 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5257 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5258 } else { 5259 iscola = *col; 5260 } 5261 if (scall != MAT_INITIAL_MATRIX) { 5262 PetscCall(PetscMalloc1(1,&aloc)); 5263 aloc[0] = *A_loc; 5264 } 5265 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5266 if (!col) { /* attach global id of condensed columns */ 5267 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5268 } 5269 *A_loc = aloc[0]; 5270 PetscCall(PetscFree(aloc)); 5271 if (!row) { 5272 PetscCall(ISDestroy(&isrowa)); 5273 } 5274 if (!col) { 5275 PetscCall(ISDestroy(&iscola)); 5276 } 5277 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5278 PetscFunctionReturn(0); 5279 } 5280 5281 /* 5282 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5283 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5284 * on a global size. 5285 * */ 5286 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5287 { 5288 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5289 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5290 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5291 PetscMPIInt owner; 5292 PetscSFNode *iremote,*oiremote; 5293 const PetscInt *lrowindices; 5294 PetscSF sf,osf; 5295 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5296 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5297 MPI_Comm comm; 5298 ISLocalToGlobalMapping mapping; 5299 const PetscScalar *pd_a,*po_a; 5300 5301 PetscFunctionBegin; 5302 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5303 /* plocalsize is the number of roots 5304 * nrows is the number of leaves 5305 * */ 5306 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5307 PetscCall(ISGetLocalSize(rows,&nrows)); 5308 PetscCall(PetscCalloc1(nrows,&iremote)); 5309 PetscCall(ISGetIndices(rows,&lrowindices)); 5310 for (i=0;i<nrows;i++) { 5311 /* Find a remote index and an owner for a row 5312 * The row could be local or remote 5313 * */ 5314 owner = 0; 5315 lidx = 0; 5316 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5317 iremote[i].index = lidx; 5318 iremote[i].rank = owner; 5319 } 5320 /* Create SF to communicate how many nonzero columns for each row */ 5321 PetscCall(PetscSFCreate(comm,&sf)); 5322 /* SF will figure out the number of nonzero colunms for each row, and their 5323 * offsets 5324 * */ 5325 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5326 PetscCall(PetscSFSetFromOptions(sf)); 5327 PetscCall(PetscSFSetUp(sf)); 5328 5329 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5330 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5331 PetscCall(PetscCalloc1(nrows,&pnnz)); 5332 roffsets[0] = 0; 5333 roffsets[1] = 0; 5334 for (i=0;i<plocalsize;i++) { 5335 /* diag */ 5336 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5337 /* off diag */ 5338 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5339 /* compute offsets so that we relative location for each row */ 5340 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5341 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5342 } 5343 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5344 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5345 /* 'r' means root, and 'l' means leaf */ 5346 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5347 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5348 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5349 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5350 PetscCall(PetscSFDestroy(&sf)); 5351 PetscCall(PetscFree(roffsets)); 5352 PetscCall(PetscFree(nrcols)); 5353 dntotalcols = 0; 5354 ontotalcols = 0; 5355 ncol = 0; 5356 for (i=0;i<nrows;i++) { 5357 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5358 ncol = PetscMax(pnnz[i],ncol); 5359 /* diag */ 5360 dntotalcols += nlcols[i*2+0]; 5361 /* off diag */ 5362 ontotalcols += nlcols[i*2+1]; 5363 } 5364 /* We do not need to figure the right number of columns 5365 * since all the calculations will be done by going through the raw data 5366 * */ 5367 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5368 PetscCall(MatSetUp(*P_oth)); 5369 PetscCall(PetscFree(pnnz)); 5370 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5371 /* diag */ 5372 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5373 /* off diag */ 5374 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5375 /* diag */ 5376 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5377 /* off diag */ 5378 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5379 dntotalcols = 0; 5380 ontotalcols = 0; 5381 ntotalcols = 0; 5382 for (i=0;i<nrows;i++) { 5383 owner = 0; 5384 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5385 /* Set iremote for diag matrix */ 5386 for (j=0;j<nlcols[i*2+0];j++) { 5387 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5388 iremote[dntotalcols].rank = owner; 5389 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5390 ilocal[dntotalcols++] = ntotalcols++; 5391 } 5392 /* off diag */ 5393 for (j=0;j<nlcols[i*2+1];j++) { 5394 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5395 oiremote[ontotalcols].rank = owner; 5396 oilocal[ontotalcols++] = ntotalcols++; 5397 } 5398 } 5399 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5400 PetscCall(PetscFree(loffsets)); 5401 PetscCall(PetscFree(nlcols)); 5402 PetscCall(PetscSFCreate(comm,&sf)); 5403 /* P serves as roots and P_oth is leaves 5404 * Diag matrix 5405 * */ 5406 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5407 PetscCall(PetscSFSetFromOptions(sf)); 5408 PetscCall(PetscSFSetUp(sf)); 5409 5410 PetscCall(PetscSFCreate(comm,&osf)); 5411 /* Off diag */ 5412 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5413 PetscCall(PetscSFSetFromOptions(osf)); 5414 PetscCall(PetscSFSetUp(osf)); 5415 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5416 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5417 /* We operate on the matrix internal data for saving memory */ 5418 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5419 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5420 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5421 /* Convert to global indices for diag matrix */ 5422 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5423 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5424 /* We want P_oth store global indices */ 5425 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5426 /* Use memory scalable approach */ 5427 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5428 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5429 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5430 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5431 /* Convert back to local indices */ 5432 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5433 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5434 nout = 0; 5435 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5436 PetscCheckFalse(nout != po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5437 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5438 /* Exchange values */ 5439 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5440 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5441 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5442 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5443 /* Stop PETSc from shrinking memory */ 5444 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5445 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5446 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5447 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5448 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5449 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5450 PetscCall(PetscSFDestroy(&sf)); 5451 PetscCall(PetscSFDestroy(&osf)); 5452 PetscFunctionReturn(0); 5453 } 5454 5455 /* 5456 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5457 * This supports MPIAIJ and MAIJ 5458 * */ 5459 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5460 { 5461 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5462 Mat_SeqAIJ *p_oth; 5463 IS rows,map; 5464 PetscHMapI hamp; 5465 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5466 MPI_Comm comm; 5467 PetscSF sf,osf; 5468 PetscBool has; 5469 5470 PetscFunctionBegin; 5471 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5472 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5473 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5474 * and then create a submatrix (that often is an overlapping matrix) 5475 * */ 5476 if (reuse == MAT_INITIAL_MATRIX) { 5477 /* Use a hash table to figure out unique keys */ 5478 PetscCall(PetscHMapICreate(&hamp)); 5479 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5480 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5481 count = 0; 5482 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5483 for (i=0;i<a->B->cmap->n;i++) { 5484 key = a->garray[i]/dof; 5485 PetscCall(PetscHMapIHas(hamp,key,&has)); 5486 if (!has) { 5487 mapping[i] = count; 5488 PetscCall(PetscHMapISet(hamp,key,count++)); 5489 } else { 5490 /* Current 'i' has the same value the previous step */ 5491 mapping[i] = count-1; 5492 } 5493 } 5494 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5495 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5496 PetscCheckFalse(htsize!=count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5497 PetscCall(PetscCalloc1(htsize,&rowindices)); 5498 off = 0; 5499 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5500 PetscCall(PetscHMapIDestroy(&hamp)); 5501 PetscCall(PetscSortInt(htsize,rowindices)); 5502 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5503 /* In case, the matrix was already created but users want to recreate the matrix */ 5504 PetscCall(MatDestroy(P_oth)); 5505 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5506 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5507 PetscCall(ISDestroy(&map)); 5508 PetscCall(ISDestroy(&rows)); 5509 } else if (reuse == MAT_REUSE_MATRIX) { 5510 /* If matrix was already created, we simply update values using SF objects 5511 * that as attached to the matrix ealier. 5512 */ 5513 const PetscScalar *pd_a,*po_a; 5514 5515 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5516 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5517 PetscCheckFalse(!sf || !osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5518 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5519 /* Update values in place */ 5520 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5521 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5522 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5523 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5524 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5525 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5526 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5527 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5528 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5529 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5530 PetscFunctionReturn(0); 5531 } 5532 5533 /*@C 5534 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5535 5536 Collective on Mat 5537 5538 Input Parameters: 5539 + A - the first matrix in mpiaij format 5540 . B - the second matrix in mpiaij format 5541 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5542 5543 Output Parameters: 5544 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5545 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5546 - B_seq - the sequential matrix generated 5547 5548 Level: developer 5549 5550 @*/ 5551 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5552 { 5553 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5554 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5555 IS isrowb,iscolb; 5556 Mat *bseq=NULL; 5557 5558 PetscFunctionBegin; 5559 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5560 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5561 } 5562 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5563 5564 if (scall == MAT_INITIAL_MATRIX) { 5565 start = A->cmap->rstart; 5566 cmap = a->garray; 5567 nzA = a->A->cmap->n; 5568 nzB = a->B->cmap->n; 5569 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5570 ncols = 0; 5571 for (i=0; i<nzB; i++) { /* row < local row index */ 5572 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5573 else break; 5574 } 5575 imark = i; 5576 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5577 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5578 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5579 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5580 } else { 5581 PetscCheckFalse(!rowb || !colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5582 isrowb = *rowb; iscolb = *colb; 5583 PetscCall(PetscMalloc1(1,&bseq)); 5584 bseq[0] = *B_seq; 5585 } 5586 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5587 *B_seq = bseq[0]; 5588 PetscCall(PetscFree(bseq)); 5589 if (!rowb) { 5590 PetscCall(ISDestroy(&isrowb)); 5591 } else { 5592 *rowb = isrowb; 5593 } 5594 if (!colb) { 5595 PetscCall(ISDestroy(&iscolb)); 5596 } else { 5597 *colb = iscolb; 5598 } 5599 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5600 PetscFunctionReturn(0); 5601 } 5602 5603 /* 5604 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5605 of the OFF-DIAGONAL portion of local A 5606 5607 Collective on Mat 5608 5609 Input Parameters: 5610 + A,B - the matrices in mpiaij format 5611 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5612 5613 Output Parameter: 5614 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5615 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5616 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5617 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5618 5619 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5620 for this matrix. This is not desirable.. 5621 5622 Level: developer 5623 5624 */ 5625 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5626 { 5627 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5628 Mat_SeqAIJ *b_oth; 5629 VecScatter ctx; 5630 MPI_Comm comm; 5631 const PetscMPIInt *rprocs,*sprocs; 5632 const PetscInt *srow,*rstarts,*sstarts; 5633 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5634 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5635 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5636 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5637 PetscMPIInt size,tag,rank,nreqs; 5638 5639 PetscFunctionBegin; 5640 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5641 PetscCallMPI(MPI_Comm_size(comm,&size)); 5642 5643 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5644 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5645 } 5646 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5647 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5648 5649 if (size == 1) { 5650 startsj_s = NULL; 5651 bufa_ptr = NULL; 5652 *B_oth = NULL; 5653 PetscFunctionReturn(0); 5654 } 5655 5656 ctx = a->Mvctx; 5657 tag = ((PetscObject)ctx)->tag; 5658 5659 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5660 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5661 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5662 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5663 PetscCall(PetscMalloc1(nreqs,&reqs)); 5664 rwaits = reqs; 5665 swaits = reqs + nrecvs; 5666 5667 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5668 if (scall == MAT_INITIAL_MATRIX) { 5669 /* i-array */ 5670 /*---------*/ 5671 /* post receives */ 5672 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5673 for (i=0; i<nrecvs; i++) { 5674 rowlen = rvalues + rstarts[i]*rbs; 5675 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5676 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5677 } 5678 5679 /* pack the outgoing message */ 5680 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5681 5682 sstartsj[0] = 0; 5683 rstartsj[0] = 0; 5684 len = 0; /* total length of j or a array to be sent */ 5685 if (nsends) { 5686 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5687 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5688 } 5689 for (i=0; i<nsends; i++) { 5690 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5691 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5692 for (j=0; j<nrows; j++) { 5693 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5694 for (l=0; l<sbs; l++) { 5695 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5696 5697 rowlen[j*sbs+l] = ncols; 5698 5699 len += ncols; 5700 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5701 } 5702 k++; 5703 } 5704 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5705 5706 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5707 } 5708 /* recvs and sends of i-array are completed */ 5709 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5710 PetscCall(PetscFree(svalues)); 5711 5712 /* allocate buffers for sending j and a arrays */ 5713 PetscCall(PetscMalloc1(len+1,&bufj)); 5714 PetscCall(PetscMalloc1(len+1,&bufa)); 5715 5716 /* create i-array of B_oth */ 5717 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5718 5719 b_othi[0] = 0; 5720 len = 0; /* total length of j or a array to be received */ 5721 k = 0; 5722 for (i=0; i<nrecvs; i++) { 5723 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5724 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5725 for (j=0; j<nrows; j++) { 5726 b_othi[k+1] = b_othi[k] + rowlen[j]; 5727 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5728 k++; 5729 } 5730 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5731 } 5732 PetscCall(PetscFree(rvalues)); 5733 5734 /* allocate space for j and a arrrays of B_oth */ 5735 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5736 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5737 5738 /* j-array */ 5739 /*---------*/ 5740 /* post receives of j-array */ 5741 for (i=0; i<nrecvs; i++) { 5742 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5743 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5744 } 5745 5746 /* pack the outgoing message j-array */ 5747 if (nsends) k = sstarts[0]; 5748 for (i=0; i<nsends; i++) { 5749 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5750 bufJ = bufj+sstartsj[i]; 5751 for (j=0; j<nrows; j++) { 5752 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5753 for (ll=0; ll<sbs; ll++) { 5754 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5755 for (l=0; l<ncols; l++) { 5756 *bufJ++ = cols[l]; 5757 } 5758 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5759 } 5760 } 5761 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5762 } 5763 5764 /* recvs and sends of j-array are completed */ 5765 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5766 } else if (scall == MAT_REUSE_MATRIX) { 5767 sstartsj = *startsj_s; 5768 rstartsj = *startsj_r; 5769 bufa = *bufa_ptr; 5770 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5771 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5772 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5773 5774 /* a-array */ 5775 /*---------*/ 5776 /* post receives of a-array */ 5777 for (i=0; i<nrecvs; i++) { 5778 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5779 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5780 } 5781 5782 /* pack the outgoing message a-array */ 5783 if (nsends) k = sstarts[0]; 5784 for (i=0; i<nsends; i++) { 5785 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5786 bufA = bufa+sstartsj[i]; 5787 for (j=0; j<nrows; j++) { 5788 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5789 for (ll=0; ll<sbs; ll++) { 5790 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5791 for (l=0; l<ncols; l++) { 5792 *bufA++ = vals[l]; 5793 } 5794 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5795 } 5796 } 5797 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5798 } 5799 /* recvs and sends of a-array are completed */ 5800 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5801 PetscCall(PetscFree(reqs)); 5802 5803 if (scall == MAT_INITIAL_MATRIX) { 5804 /* put together the new matrix */ 5805 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5806 5807 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5808 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5809 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5810 b_oth->free_a = PETSC_TRUE; 5811 b_oth->free_ij = PETSC_TRUE; 5812 b_oth->nonew = 0; 5813 5814 PetscCall(PetscFree(bufj)); 5815 if (!startsj_s || !bufa_ptr) { 5816 PetscCall(PetscFree2(sstartsj,rstartsj)); 5817 PetscCall(PetscFree(bufa_ptr)); 5818 } else { 5819 *startsj_s = sstartsj; 5820 *startsj_r = rstartsj; 5821 *bufa_ptr = bufa; 5822 } 5823 } else if (scall == MAT_REUSE_MATRIX) { 5824 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5825 } 5826 5827 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5828 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5829 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5830 PetscFunctionReturn(0); 5831 } 5832 5833 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5834 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5835 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5836 #if defined(PETSC_HAVE_MKL_SPARSE) 5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5838 #endif 5839 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5841 #if defined(PETSC_HAVE_ELEMENTAL) 5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5843 #endif 5844 #if defined(PETSC_HAVE_SCALAPACK) 5845 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5846 #endif 5847 #if defined(PETSC_HAVE_HYPRE) 5848 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5849 #endif 5850 #if defined(PETSC_HAVE_CUDA) 5851 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5852 #endif 5853 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5855 #endif 5856 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5857 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5858 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5859 5860 /* 5861 Computes (B'*A')' since computing B*A directly is untenable 5862 5863 n p p 5864 [ ] [ ] [ ] 5865 m [ A ] * n [ B ] = m [ C ] 5866 [ ] [ ] [ ] 5867 5868 */ 5869 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5870 { 5871 Mat At,Bt,Ct; 5872 5873 PetscFunctionBegin; 5874 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5875 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5876 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5877 PetscCall(MatDestroy(&At)); 5878 PetscCall(MatDestroy(&Bt)); 5879 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5880 PetscCall(MatDestroy(&Ct)); 5881 PetscFunctionReturn(0); 5882 } 5883 5884 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5885 { 5886 PetscBool cisdense; 5887 5888 PetscFunctionBegin; 5889 PetscCheckFalse(A->cmap->n != B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5890 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5891 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5892 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5893 if (!cisdense) { 5894 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5895 } 5896 PetscCall(MatSetUp(C)); 5897 5898 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5899 PetscFunctionReturn(0); 5900 } 5901 5902 /* ----------------------------------------------------------------*/ 5903 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5904 { 5905 Mat_Product *product = C->product; 5906 Mat A = product->A,B=product->B; 5907 5908 PetscFunctionBegin; 5909 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5910 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5911 5912 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5913 C->ops->productsymbolic = MatProductSymbolic_AB; 5914 PetscFunctionReturn(0); 5915 } 5916 5917 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5918 { 5919 Mat_Product *product = C->product; 5920 5921 PetscFunctionBegin; 5922 if (product->type == MATPRODUCT_AB) { 5923 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5924 } 5925 PetscFunctionReturn(0); 5926 } 5927 5928 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 5929 is greater than value, or last if there is no such element. 5930 */ 5931 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 5932 { 5933 PetscCount it,step,count = last - first; 5934 5935 PetscFunctionBegin; 5936 while (count > 0) { 5937 it = first; 5938 step = count / 2; 5939 it += step; 5940 if (!(value < array[it])) { 5941 first = ++it; 5942 count -= step + 1; 5943 } else count = step; 5944 } 5945 *upper = first; 5946 PetscFunctionReturn(0); 5947 } 5948 5949 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 5950 5951 Input Parameters: 5952 5953 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5954 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5955 5956 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 5957 5958 For Set1, j1[] contains column indices of the nonzeros. 5959 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5960 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5961 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5962 5963 Similar for Set2. 5964 5965 This routine merges the two sets of nonzeros row by row and removes repeats. 5966 5967 Output Parameters: (memories are allocated by the caller) 5968 5969 i[],j[]: the CSR of the merged matrix, which has m rows. 5970 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5971 imap2[]: similar to imap1[], but for Set2. 5972 Note we order nonzeros row-by-row and from left to right. 5973 */ 5974 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5975 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5976 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5977 { 5978 PetscInt r,m; /* Row index of mat */ 5979 PetscCount t,t1,t2,b1,e1,b2,e2; 5980 5981 PetscFunctionBegin; 5982 PetscCall(MatGetLocalSize(mat,&m,NULL)); 5983 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5984 i[0] = 0; 5985 for (r=0; r<m; r++) { /* Do row by row merging */ 5986 b1 = rowBegin1[r]; 5987 e1 = rowEnd1[r]; 5988 b2 = rowBegin2[r]; 5989 e2 = rowEnd2[r]; 5990 while (b1 < e1 && b2 < e2) { 5991 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5992 j[t] = j1[b1]; 5993 imap1[t1] = t; 5994 imap2[t2] = t; 5995 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 5996 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 5997 t1++; t2++; t++; 5998 } else if (j1[b1] < j2[b2]) { 5999 j[t] = j1[b1]; 6000 imap1[t1] = t; 6001 b1 += jmap1[t1+1] - jmap1[t1]; 6002 t1++; t++; 6003 } else { 6004 j[t] = j2[b2]; 6005 imap2[t2] = t; 6006 b2 += jmap2[t2+1] - jmap2[t2]; 6007 t2++; t++; 6008 } 6009 } 6010 /* Merge the remaining in either j1[] or j2[] */ 6011 while (b1 < e1) { 6012 j[t] = j1[b1]; 6013 imap1[t1] = t; 6014 b1 += jmap1[t1+1] - jmap1[t1]; 6015 t1++; t++; 6016 } 6017 while (b2 < e2) { 6018 j[t] = j2[b2]; 6019 imap2[t2] = t; 6020 b2 += jmap2[t2+1] - jmap2[t2]; 6021 t2++; t++; 6022 } 6023 i[r+1] = t; 6024 } 6025 PetscFunctionReturn(0); 6026 } 6027 6028 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6029 6030 Input Parameters: 6031 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6032 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6033 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6034 6035 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6036 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6037 6038 Output Parameters: 6039 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6040 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6041 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6042 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6043 6044 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6045 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6046 repeats (i.e., same 'i,j' pair). 6047 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6048 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6049 6050 Atot: number of entries belonging to the diagonal block 6051 Annz: number of unique nonzeros belonging to the diagonal block. 6052 6053 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6054 6055 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6056 */ 6057 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6058 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6059 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6060 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6061 { 6062 PetscInt cstart,cend,rstart,rend,row,col; 6063 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6064 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6065 PetscCount k,m,p,q,r,s,mid; 6066 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6067 6068 PetscFunctionBegin; 6069 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6070 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6071 m = rend - rstart; 6072 6073 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6074 6075 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6076 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6077 */ 6078 while (k<n) { 6079 row = i[k]; 6080 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6081 for (s=k; s<n; s++) if (i[s] != row) break; 6082 for (p=k; p<s; p++) { 6083 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6084 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6085 } 6086 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6087 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6088 rowBegin[row-rstart] = k; 6089 rowMid[row-rstart] = mid; 6090 rowEnd[row-rstart] = s; 6091 6092 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6093 Atot += mid - k; 6094 Btot += s - mid; 6095 6096 /* Count unique nonzeros of this diag/offdiag row */ 6097 for (p=k; p<mid;) { 6098 col = j[p]; 6099 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6100 Annz++; 6101 } 6102 6103 for (p=mid; p<s;) { 6104 col = j[p]; 6105 do {p++;} while (p<s && j[p] == col); 6106 Bnnz++; 6107 } 6108 k = s; 6109 } 6110 6111 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6112 PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap)); 6113 6114 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6115 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6116 for (r=0; r<m; r++) { 6117 k = rowBegin[r]; 6118 mid = rowMid[r]; 6119 s = rowEnd[r]; 6120 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6121 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6122 Atot += mid - k; 6123 Btot += s - mid; 6124 6125 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6126 for (p=k; p<mid;) { 6127 col = j[p]; 6128 q = p; 6129 do {p++;} while (p<mid && j[p] == col); 6130 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6131 Annz++; 6132 } 6133 6134 for (p=mid; p<s;) { 6135 col = j[p]; 6136 q = p; 6137 do {p++;} while (p<s && j[p] == col); 6138 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6139 Bnnz++; 6140 } 6141 } 6142 /* Output */ 6143 *Aperm_ = Aperm; 6144 *Annz_ = Annz; 6145 *Atot_ = Atot; 6146 *Ajmap_ = Ajmap; 6147 *Bperm_ = Bperm; 6148 *Bnnz_ = Bnnz; 6149 *Btot_ = Btot; 6150 *Bjmap_ = Bjmap; 6151 PetscFunctionReturn(0); 6152 } 6153 6154 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6155 { 6156 MPI_Comm comm; 6157 PetscMPIInt rank,size; 6158 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6159 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6160 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6161 6162 PetscFunctionBegin; 6163 PetscCall(PetscFree(mpiaij->garray)); 6164 PetscCall(VecDestroy(&mpiaij->lvec)); 6165 #if defined(PETSC_USE_CTABLE) 6166 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6167 #else 6168 PetscCall(PetscFree(mpiaij->colmap)); 6169 #endif 6170 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6171 mat->assembled = PETSC_FALSE; 6172 mat->was_assembled = PETSC_FALSE; 6173 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6174 6175 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6176 PetscCallMPI(MPI_Comm_size(comm,&size)); 6177 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6178 PetscCall(PetscLayoutSetUp(mat->rmap)); 6179 PetscCall(PetscLayoutSetUp(mat->cmap)); 6180 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6181 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6182 PetscCall(MatGetLocalSize(mat,&m,&n)); 6183 PetscCall(MatGetSize(mat,&M,&N)); 6184 6185 /* ---------------------------------------------------------------------------*/ 6186 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6187 /* entries come first, then local rows, then remote rows. */ 6188 /* ---------------------------------------------------------------------------*/ 6189 PetscCount n1 = coo_n,*perm1; 6190 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6191 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6192 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6193 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6194 for (k=0; k<n1; k++) perm1[k] = k; 6195 6196 /* Manipulate indices so that entries with negative row or col indices will have smallest 6197 row indices, local entries will have greater but negative row indices, and remote entries 6198 will have positive row indices. 6199 */ 6200 for (k=0; k<n1; k++) { 6201 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6202 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6203 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6204 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6205 } 6206 6207 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6208 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6209 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6210 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6211 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6212 6213 /* ---------------------------------------------------------------------------*/ 6214 /* Split local rows into diag/offdiag portions */ 6215 /* ---------------------------------------------------------------------------*/ 6216 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6217 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6218 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6219 6220 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6221 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6222 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6223 6224 /* ---------------------------------------------------------------------------*/ 6225 /* Send remote rows to their owner */ 6226 /* ---------------------------------------------------------------------------*/ 6227 /* Find which rows should be sent to which remote ranks*/ 6228 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6229 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6230 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6231 const PetscInt *ranges; 6232 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6233 6234 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6235 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6236 for (k=rem; k<n1;) { 6237 PetscMPIInt owner; 6238 PetscInt firstRow,lastRow; 6239 6240 /* Locate a row range */ 6241 firstRow = i1[k]; /* first row of this owner */ 6242 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6243 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6244 6245 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6246 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6247 6248 /* All entries in [k,p) belong to this remote owner */ 6249 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6250 PetscMPIInt *sendto2; 6251 PetscInt *nentries2; 6252 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6253 6254 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6255 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6256 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6257 PetscCall(PetscFree2(sendto,nentries2)); 6258 sendto = sendto2; 6259 nentries = nentries2; 6260 maxNsend = maxNsend2; 6261 } 6262 sendto[nsend] = owner; 6263 nentries[nsend] = p - k; 6264 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6265 nsend++; 6266 k = p; 6267 } 6268 6269 /* Build 1st SF to know offsets on remote to send data */ 6270 PetscSF sf1; 6271 PetscInt nroots = 1,nroots2 = 0; 6272 PetscInt nleaves = nsend,nleaves2 = 0; 6273 PetscInt *offsets; 6274 PetscSFNode *iremote; 6275 6276 PetscCall(PetscSFCreate(comm,&sf1)); 6277 PetscCall(PetscMalloc1(nsend,&iremote)); 6278 PetscCall(PetscMalloc1(nsend,&offsets)); 6279 for (k=0; k<nsend; k++) { 6280 iremote[k].rank = sendto[k]; 6281 iremote[k].index = 0; 6282 nleaves2 += nentries[k]; 6283 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6284 } 6285 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6286 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6287 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6288 PetscCall(PetscSFDestroy(&sf1)); 6289 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6290 6291 /* Build 2nd SF to send remote COOs to their owner */ 6292 PetscSF sf2; 6293 nroots = nroots2; 6294 nleaves = nleaves2; 6295 PetscCall(PetscSFCreate(comm,&sf2)); 6296 PetscCall(PetscSFSetFromOptions(sf2)); 6297 PetscCall(PetscMalloc1(nleaves,&iremote)); 6298 p = 0; 6299 for (k=0; k<nsend; k++) { 6300 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6301 for (q=0; q<nentries[k]; q++,p++) { 6302 iremote[p].rank = sendto[k]; 6303 iremote[p].index = offsets[k] + q; 6304 } 6305 } 6306 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6307 6308 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6309 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6310 6311 /* Send the remote COOs to their owner */ 6312 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6313 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6314 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6315 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6316 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6317 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6318 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6319 6320 PetscCall(PetscFree(offsets)); 6321 PetscCall(PetscFree2(sendto,nentries)); 6322 6323 /* ---------------------------------------------------------------*/ 6324 /* Sort received COOs by row along with the permutation array */ 6325 /* ---------------------------------------------------------------*/ 6326 for (k=0; k<n2; k++) perm2[k] = k; 6327 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6328 6329 /* ---------------------------------------------------------------*/ 6330 /* Split received COOs into diag/offdiag portions */ 6331 /* ---------------------------------------------------------------*/ 6332 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6333 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6334 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6335 6336 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6337 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6338 6339 /* --------------------------------------------------------------------------*/ 6340 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6341 /* --------------------------------------------------------------------------*/ 6342 PetscInt *Ai,*Bi; 6343 PetscInt *Aj,*Bj; 6344 6345 PetscCall(PetscMalloc1(m+1,&Ai)); 6346 PetscCall(PetscMalloc1(m+1,&Bi)); 6347 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6348 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6349 6350 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6351 PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2)); 6352 6353 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6354 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6355 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6356 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6357 PetscCall(PetscFree3(i1,j1,perm1)); 6358 PetscCall(PetscFree3(i2,j2,perm2)); 6359 6360 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6361 PetscInt Annz = Ai[m]; 6362 PetscInt Bnnz = Bi[m]; 6363 if (Annz < Annz1 + Annz2) { 6364 PetscInt *Aj_new; 6365 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6366 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6367 PetscCall(PetscFree(Aj)); 6368 Aj = Aj_new; 6369 } 6370 6371 if (Bnnz < Bnnz1 + Bnnz2) { 6372 PetscInt *Bj_new; 6373 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6374 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6375 PetscCall(PetscFree(Bj)); 6376 Bj = Bj_new; 6377 } 6378 6379 /* --------------------------------------------------------------------------------*/ 6380 /* Create new submatrices for on-process and off-process coupling */ 6381 /* --------------------------------------------------------------------------------*/ 6382 PetscScalar *Aa,*Ba; 6383 MatType rtype; 6384 Mat_SeqAIJ *a,*b; 6385 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6386 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6387 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6388 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6389 PetscCall(MatDestroy(&mpiaij->A)); 6390 PetscCall(MatDestroy(&mpiaij->B)); 6391 PetscCall(MatGetRootType_Private(mat,&rtype)); 6392 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6393 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6394 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6395 6396 a = (Mat_SeqAIJ*)mpiaij->A->data; 6397 b = (Mat_SeqAIJ*)mpiaij->B->data; 6398 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6399 a->free_a = b->free_a = PETSC_TRUE; 6400 a->free_ij = b->free_ij = PETSC_TRUE; 6401 6402 /* conversion must happen AFTER multiply setup */ 6403 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6404 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6405 PetscCall(VecDestroy(&mpiaij->lvec)); 6406 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6407 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6408 6409 mpiaij->coo_n = coo_n; 6410 mpiaij->coo_sf = sf2; 6411 mpiaij->sendlen = nleaves; 6412 mpiaij->recvlen = nroots; 6413 6414 mpiaij->Annz1 = Annz1; 6415 mpiaij->Annz2 = Annz2; 6416 mpiaij->Bnnz1 = Bnnz1; 6417 mpiaij->Bnnz2 = Bnnz2; 6418 6419 mpiaij->Atot1 = Atot1; 6420 mpiaij->Atot2 = Atot2; 6421 mpiaij->Btot1 = Btot1; 6422 mpiaij->Btot2 = Btot2; 6423 6424 mpiaij->Aimap1 = Aimap1; 6425 mpiaij->Aimap2 = Aimap2; 6426 mpiaij->Bimap1 = Bimap1; 6427 mpiaij->Bimap2 = Bimap2; 6428 6429 mpiaij->Ajmap1 = Ajmap1; 6430 mpiaij->Ajmap2 = Ajmap2; 6431 mpiaij->Bjmap1 = Bjmap1; 6432 mpiaij->Bjmap2 = Bjmap2; 6433 6434 mpiaij->Aperm1 = Aperm1; 6435 mpiaij->Aperm2 = Aperm2; 6436 mpiaij->Bperm1 = Bperm1; 6437 mpiaij->Bperm2 = Bperm2; 6438 6439 mpiaij->Cperm1 = Cperm1; 6440 6441 /* Allocate in preallocation. If not used, it has zero cost on host */ 6442 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6443 PetscFunctionReturn(0); 6444 } 6445 6446 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6447 { 6448 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6449 Mat A = mpiaij->A,B = mpiaij->B; 6450 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6451 PetscScalar *Aa,*Ba; 6452 PetscScalar *sendbuf = mpiaij->sendbuf; 6453 PetscScalar *recvbuf = mpiaij->recvbuf; 6454 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6455 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6456 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6457 const PetscCount *Cperm1 = mpiaij->Cperm1; 6458 6459 PetscFunctionBegin; 6460 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6461 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6462 if (imode == INSERT_VALUES) { 6463 PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar))); 6464 PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar))); 6465 } 6466 6467 /* Pack entries to be sent to remote */ 6468 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6469 6470 /* Send remote entries to their owner and overlap the communication with local computation */ 6471 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6472 /* Add local entries to A and B */ 6473 for (PetscCount i=0; i<Annz1; i++) { 6474 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6475 } 6476 for (PetscCount i=0; i<Bnnz1; i++) { 6477 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6478 } 6479 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6480 6481 /* Add received remote entries to A and B */ 6482 for (PetscCount i=0; i<Annz2; i++) { 6483 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6484 } 6485 for (PetscCount i=0; i<Bnnz2; i++) { 6486 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6487 } 6488 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6489 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6490 PetscFunctionReturn(0); 6491 } 6492 6493 /* ----------------------------------------------------------------*/ 6494 6495 /*MC 6496 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6497 6498 Options Database Keys: 6499 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6500 6501 Level: beginner 6502 6503 Notes: 6504 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6505 in this case the values associated with the rows and columns one passes in are set to zero 6506 in the matrix 6507 6508 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6509 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6510 6511 .seealso: MatCreateAIJ() 6512 M*/ 6513 6514 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6515 { 6516 Mat_MPIAIJ *b; 6517 PetscMPIInt size; 6518 6519 PetscFunctionBegin; 6520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6521 6522 PetscCall(PetscNewLog(B,&b)); 6523 B->data = (void*)b; 6524 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6525 B->assembled = PETSC_FALSE; 6526 B->insertmode = NOT_SET_VALUES; 6527 b->size = size; 6528 6529 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6530 6531 /* build cache for off array entries formed */ 6532 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6533 6534 b->donotstash = PETSC_FALSE; 6535 b->colmap = NULL; 6536 b->garray = NULL; 6537 b->roworiented = PETSC_TRUE; 6538 6539 /* stuff used for matrix vector multiply */ 6540 b->lvec = NULL; 6541 b->Mvctx = NULL; 6542 6543 /* stuff for MatGetRow() */ 6544 b->rowindices = NULL; 6545 b->rowvalues = NULL; 6546 b->getrowactive = PETSC_FALSE; 6547 6548 /* flexible pointer used in CUSPARSE classes */ 6549 b->spptr = NULL; 6550 6551 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6552 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6553 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6554 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6555 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6556 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6557 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6558 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6559 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6560 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6561 #if defined(PETSC_HAVE_CUDA) 6562 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6563 #endif 6564 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6565 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6566 #endif 6567 #if defined(PETSC_HAVE_MKL_SPARSE) 6568 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6569 #endif 6570 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6571 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6572 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6573 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6574 #if defined(PETSC_HAVE_ELEMENTAL) 6575 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6576 #endif 6577 #if defined(PETSC_HAVE_SCALAPACK) 6578 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6579 #endif 6580 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6581 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6582 #if defined(PETSC_HAVE_HYPRE) 6583 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6584 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6585 #endif 6586 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6587 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6588 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6589 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6590 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6591 PetscFunctionReturn(0); 6592 } 6593 6594 /*@C 6595 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6596 and "off-diagonal" part of the matrix in CSR format. 6597 6598 Collective 6599 6600 Input Parameters: 6601 + comm - MPI communicator 6602 . m - number of local rows (Cannot be PETSC_DECIDE) 6603 . n - This value should be the same as the local size used in creating the 6604 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6605 calculated if N is given) For square matrices n is almost always m. 6606 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6607 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6608 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6609 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6610 . a - matrix values 6611 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6612 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6613 - oa - matrix values 6614 6615 Output Parameter: 6616 . mat - the matrix 6617 6618 Level: advanced 6619 6620 Notes: 6621 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6622 must free the arrays once the matrix has been destroyed and not before. 6623 6624 The i and j indices are 0 based 6625 6626 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6627 6628 This sets local rows and cannot be used to set off-processor values. 6629 6630 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6631 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6632 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6633 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6634 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6635 communication if it is known that only local entries will be set. 6636 6637 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6638 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6639 @*/ 6640 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6641 { 6642 Mat_MPIAIJ *maij; 6643 6644 PetscFunctionBegin; 6645 PetscCheckFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6646 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6647 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6648 PetscCall(MatCreate(comm,mat)); 6649 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6650 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6651 maij = (Mat_MPIAIJ*) (*mat)->data; 6652 6653 (*mat)->preallocated = PETSC_TRUE; 6654 6655 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6656 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6657 6658 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6659 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6660 6661 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6662 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6663 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6664 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6665 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6666 PetscFunctionReturn(0); 6667 } 6668 6669 typedef struct { 6670 Mat *mp; /* intermediate products */ 6671 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6672 PetscInt cp; /* number of intermediate products */ 6673 6674 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6675 PetscInt *startsj_s,*startsj_r; 6676 PetscScalar *bufa; 6677 Mat P_oth; 6678 6679 /* may take advantage of merging product->B */ 6680 Mat Bloc; /* B-local by merging diag and off-diag */ 6681 6682 /* cusparse does not have support to split between symbolic and numeric phases. 6683 When api_user is true, we don't need to update the numerical values 6684 of the temporary storage */ 6685 PetscBool reusesym; 6686 6687 /* support for COO values insertion */ 6688 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6689 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6690 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6691 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6692 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6693 PetscMemType mtype; 6694 6695 /* customization */ 6696 PetscBool abmerge; 6697 PetscBool P_oth_bind; 6698 } MatMatMPIAIJBACKEND; 6699 6700 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6701 { 6702 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6703 PetscInt i; 6704 6705 PetscFunctionBegin; 6706 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6707 PetscCall(PetscFree(mmdata->bufa)); 6708 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6709 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6710 PetscCall(MatDestroy(&mmdata->P_oth)); 6711 PetscCall(MatDestroy(&mmdata->Bloc)); 6712 PetscCall(PetscSFDestroy(&mmdata->sf)); 6713 for (i = 0; i < mmdata->cp; i++) { 6714 PetscCall(MatDestroy(&mmdata->mp[i])); 6715 } 6716 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6717 PetscCall(PetscFree(mmdata->own[0])); 6718 PetscCall(PetscFree(mmdata->own)); 6719 PetscCall(PetscFree(mmdata->off[0])); 6720 PetscCall(PetscFree(mmdata->off)); 6721 PetscCall(PetscFree(mmdata)); 6722 PetscFunctionReturn(0); 6723 } 6724 6725 /* Copy selected n entries with indices in idx[] of A to v[]. 6726 If idx is NULL, copy the whole data array of A to v[] 6727 */ 6728 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6729 { 6730 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6731 6732 PetscFunctionBegin; 6733 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6734 if (f) { 6735 PetscCall((*f)(A,n,idx,v)); 6736 } else { 6737 const PetscScalar *vv; 6738 6739 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6740 if (n && idx) { 6741 PetscScalar *w = v; 6742 const PetscInt *oi = idx; 6743 PetscInt j; 6744 6745 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6746 } else { 6747 PetscCall(PetscArraycpy(v,vv,n)); 6748 } 6749 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6750 } 6751 PetscFunctionReturn(0); 6752 } 6753 6754 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6755 { 6756 MatMatMPIAIJBACKEND *mmdata; 6757 PetscInt i,n_d,n_o; 6758 6759 PetscFunctionBegin; 6760 MatCheckProduct(C,1); 6761 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6762 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6763 if (!mmdata->reusesym) { /* update temporary matrices */ 6764 if (mmdata->P_oth) { 6765 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6766 } 6767 if (mmdata->Bloc) { 6768 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6769 } 6770 } 6771 mmdata->reusesym = PETSC_FALSE; 6772 6773 for (i = 0; i < mmdata->cp; i++) { 6774 PetscCheckFalse(!mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6775 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6776 } 6777 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6778 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6779 6780 if (mmdata->mptmp[i]) continue; 6781 if (noff) { 6782 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6783 6784 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6785 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6786 n_o += noff; 6787 n_d += nown; 6788 } else { 6789 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6790 6791 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6792 n_d += mm->nz; 6793 } 6794 } 6795 if (mmdata->hasoffproc) { /* offprocess insertion */ 6796 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6797 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6798 } 6799 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6800 PetscFunctionReturn(0); 6801 } 6802 6803 /* Support for Pt * A, A * P, or Pt * A * P */ 6804 #define MAX_NUMBER_INTERMEDIATE 4 6805 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6806 { 6807 Mat_Product *product = C->product; 6808 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6809 Mat_MPIAIJ *a,*p; 6810 MatMatMPIAIJBACKEND *mmdata; 6811 ISLocalToGlobalMapping P_oth_l2g = NULL; 6812 IS glob = NULL; 6813 const char *prefix; 6814 char pprefix[256]; 6815 const PetscInt *globidx,*P_oth_idx; 6816 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6817 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6818 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6819 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6820 /* a base offset; type-2: sparse with a local to global map table */ 6821 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6822 6823 MatProductType ptype; 6824 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6825 PetscMPIInt size; 6826 PetscErrorCode ierr; 6827 6828 PetscFunctionBegin; 6829 MatCheckProduct(C,1); 6830 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6831 ptype = product->type; 6832 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6833 ptype = MATPRODUCT_AB; 6834 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6835 } 6836 switch (ptype) { 6837 case MATPRODUCT_AB: 6838 A = product->A; 6839 P = product->B; 6840 m = A->rmap->n; 6841 n = P->cmap->n; 6842 M = A->rmap->N; 6843 N = P->cmap->N; 6844 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6845 break; 6846 case MATPRODUCT_AtB: 6847 P = product->A; 6848 A = product->B; 6849 m = P->cmap->n; 6850 n = A->cmap->n; 6851 M = P->cmap->N; 6852 N = A->cmap->N; 6853 hasoffproc = PETSC_TRUE; 6854 break; 6855 case MATPRODUCT_PtAP: 6856 A = product->A; 6857 P = product->B; 6858 m = P->cmap->n; 6859 n = P->cmap->n; 6860 M = P->cmap->N; 6861 N = P->cmap->N; 6862 hasoffproc = PETSC_TRUE; 6863 break; 6864 default: 6865 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6866 } 6867 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6868 if (size == 1) hasoffproc = PETSC_FALSE; 6869 6870 /* defaults */ 6871 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6872 mp[i] = NULL; 6873 mptmp[i] = PETSC_FALSE; 6874 rmapt[i] = -1; 6875 cmapt[i] = -1; 6876 rmapa[i] = NULL; 6877 cmapa[i] = NULL; 6878 } 6879 6880 /* customization */ 6881 PetscCall(PetscNew(&mmdata)); 6882 mmdata->reusesym = product->api_user; 6883 if (ptype == MATPRODUCT_AB) { 6884 if (product->api_user) { 6885 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");PetscCall(ierr); 6886 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6887 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6888 ierr = PetscOptionsEnd();PetscCall(ierr); 6889 } else { 6890 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");PetscCall(ierr); 6891 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6892 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6893 ierr = PetscOptionsEnd();PetscCall(ierr); 6894 } 6895 } else if (ptype == MATPRODUCT_PtAP) { 6896 if (product->api_user) { 6897 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");PetscCall(ierr); 6898 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6899 ierr = PetscOptionsEnd();PetscCall(ierr); 6900 } else { 6901 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr); 6902 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6903 ierr = PetscOptionsEnd();PetscCall(ierr); 6904 } 6905 } 6906 a = (Mat_MPIAIJ*)A->data; 6907 p = (Mat_MPIAIJ*)P->data; 6908 PetscCall(MatSetSizes(C,m,n,M,N)); 6909 PetscCall(PetscLayoutSetUp(C->rmap)); 6910 PetscCall(PetscLayoutSetUp(C->cmap)); 6911 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6912 PetscCall(MatGetOptionsPrefix(C,&prefix)); 6913 6914 cp = 0; 6915 switch (ptype) { 6916 case MATPRODUCT_AB: /* A * P */ 6917 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6918 6919 /* A_diag * P_local (merged or not) */ 6920 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6921 /* P is product->B */ 6922 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6923 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 6924 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6925 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6926 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6927 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6928 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6929 mp[cp]->product->api_user = product->api_user; 6930 PetscCall(MatProductSetFromOptions(mp[cp])); 6931 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6932 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6933 PetscCall(ISGetIndices(glob,&globidx)); 6934 rmapt[cp] = 1; 6935 cmapt[cp] = 2; 6936 cmapa[cp] = globidx; 6937 mptmp[cp] = PETSC_FALSE; 6938 cp++; 6939 } else { /* A_diag * P_diag and A_diag * P_off */ 6940 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 6941 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6942 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6943 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6944 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6945 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6946 mp[cp]->product->api_user = product->api_user; 6947 PetscCall(MatProductSetFromOptions(mp[cp])); 6948 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6949 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6950 rmapt[cp] = 1; 6951 cmapt[cp] = 1; 6952 mptmp[cp] = PETSC_FALSE; 6953 cp++; 6954 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 6955 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6956 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6957 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6958 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6959 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6960 mp[cp]->product->api_user = product->api_user; 6961 PetscCall(MatProductSetFromOptions(mp[cp])); 6962 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6963 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6964 rmapt[cp] = 1; 6965 cmapt[cp] = 2; 6966 cmapa[cp] = p->garray; 6967 mptmp[cp] = PETSC_FALSE; 6968 cp++; 6969 } 6970 6971 /* A_off * P_other */ 6972 if (mmdata->P_oth) { 6973 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 6974 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 6975 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 6976 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 6977 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 6978 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6979 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6980 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6981 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6982 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6983 mp[cp]->product->api_user = product->api_user; 6984 PetscCall(MatProductSetFromOptions(mp[cp])); 6985 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6986 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6987 rmapt[cp] = 1; 6988 cmapt[cp] = 2; 6989 cmapa[cp] = P_oth_idx; 6990 mptmp[cp] = PETSC_FALSE; 6991 cp++; 6992 } 6993 break; 6994 6995 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6996 /* A is product->B */ 6997 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6998 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6999 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7000 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7001 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7002 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7003 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7004 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7005 mp[cp]->product->api_user = product->api_user; 7006 PetscCall(MatProductSetFromOptions(mp[cp])); 7007 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7008 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7009 PetscCall(ISGetIndices(glob,&globidx)); 7010 rmapt[cp] = 2; 7011 rmapa[cp] = globidx; 7012 cmapt[cp] = 2; 7013 cmapa[cp] = globidx; 7014 mptmp[cp] = PETSC_FALSE; 7015 cp++; 7016 } else { 7017 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7018 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7019 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7020 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7021 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7022 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7023 mp[cp]->product->api_user = product->api_user; 7024 PetscCall(MatProductSetFromOptions(mp[cp])); 7025 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7026 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7027 PetscCall(ISGetIndices(glob,&globidx)); 7028 rmapt[cp] = 1; 7029 cmapt[cp] = 2; 7030 cmapa[cp] = globidx; 7031 mptmp[cp] = PETSC_FALSE; 7032 cp++; 7033 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7034 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7035 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7036 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7037 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7038 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7039 mp[cp]->product->api_user = product->api_user; 7040 PetscCall(MatProductSetFromOptions(mp[cp])); 7041 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7042 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7043 rmapt[cp] = 2; 7044 rmapa[cp] = p->garray; 7045 cmapt[cp] = 2; 7046 cmapa[cp] = globidx; 7047 mptmp[cp] = PETSC_FALSE; 7048 cp++; 7049 } 7050 break; 7051 case MATPRODUCT_PtAP: 7052 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7053 /* P is product->B */ 7054 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7055 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7056 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7057 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7058 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7059 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7060 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7061 mp[cp]->product->api_user = product->api_user; 7062 PetscCall(MatProductSetFromOptions(mp[cp])); 7063 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7064 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7065 PetscCall(ISGetIndices(glob,&globidx)); 7066 rmapt[cp] = 2; 7067 rmapa[cp] = globidx; 7068 cmapt[cp] = 2; 7069 cmapa[cp] = globidx; 7070 mptmp[cp] = PETSC_FALSE; 7071 cp++; 7072 if (mmdata->P_oth) { 7073 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7074 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7075 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7076 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7077 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7078 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7079 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7080 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7081 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7082 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7083 mp[cp]->product->api_user = product->api_user; 7084 PetscCall(MatProductSetFromOptions(mp[cp])); 7085 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7086 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7087 mptmp[cp] = PETSC_TRUE; 7088 cp++; 7089 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7090 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7091 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7092 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7093 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7094 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7095 mp[cp]->product->api_user = product->api_user; 7096 PetscCall(MatProductSetFromOptions(mp[cp])); 7097 PetscCheckFalse(!mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7098 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7099 rmapt[cp] = 2; 7100 rmapa[cp] = globidx; 7101 cmapt[cp] = 2; 7102 cmapa[cp] = P_oth_idx; 7103 mptmp[cp] = PETSC_FALSE; 7104 cp++; 7105 } 7106 break; 7107 default: 7108 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7109 } 7110 /* sanity check */ 7111 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7112 7113 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7114 for (i = 0; i < cp; i++) { 7115 mmdata->mp[i] = mp[i]; 7116 mmdata->mptmp[i] = mptmp[i]; 7117 } 7118 mmdata->cp = cp; 7119 C->product->data = mmdata; 7120 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7121 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7122 7123 /* memory type */ 7124 mmdata->mtype = PETSC_MEMTYPE_HOST; 7125 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7126 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7127 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7128 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7129 7130 /* prepare coo coordinates for values insertion */ 7131 7132 /* count total nonzeros of those intermediate seqaij Mats 7133 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7134 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7135 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7136 */ 7137 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7138 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7139 if (mptmp[cp]) continue; 7140 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7141 const PetscInt *rmap = rmapa[cp]; 7142 const PetscInt mr = mp[cp]->rmap->n; 7143 const PetscInt rs = C->rmap->rstart; 7144 const PetscInt re = C->rmap->rend; 7145 const PetscInt *ii = mm->i; 7146 for (i = 0; i < mr; i++) { 7147 const PetscInt gr = rmap[i]; 7148 const PetscInt nz = ii[i+1] - ii[i]; 7149 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7150 else ncoo_oown += nz; /* this row is local */ 7151 } 7152 } else ncoo_d += mm->nz; 7153 } 7154 7155 /* 7156 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7157 7158 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7159 7160 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7161 7162 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7163 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7164 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7165 7166 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7167 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7168 */ 7169 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7170 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7171 7172 /* gather (i,j) of nonzeros inserted by remote procs */ 7173 if (hasoffproc) { 7174 PetscSF msf; 7175 PetscInt ncoo2,*coo_i2,*coo_j2; 7176 7177 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7178 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7179 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7180 7181 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7182 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7183 PetscInt *idxoff = mmdata->off[cp]; 7184 PetscInt *idxown = mmdata->own[cp]; 7185 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7186 const PetscInt *rmap = rmapa[cp]; 7187 const PetscInt *cmap = cmapa[cp]; 7188 const PetscInt *ii = mm->i; 7189 PetscInt *coi = coo_i + ncoo_o; 7190 PetscInt *coj = coo_j + ncoo_o; 7191 const PetscInt mr = mp[cp]->rmap->n; 7192 const PetscInt rs = C->rmap->rstart; 7193 const PetscInt re = C->rmap->rend; 7194 const PetscInt cs = C->cmap->rstart; 7195 for (i = 0; i < mr; i++) { 7196 const PetscInt *jj = mm->j + ii[i]; 7197 const PetscInt gr = rmap[i]; 7198 const PetscInt nz = ii[i+1] - ii[i]; 7199 if (gr < rs || gr >= re) { /* this is an offproc row */ 7200 for (j = ii[i]; j < ii[i+1]; j++) { 7201 *coi++ = gr; 7202 *idxoff++ = j; 7203 } 7204 if (!cmapt[cp]) { /* already global */ 7205 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7206 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7207 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7208 } else { /* offdiag */ 7209 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7210 } 7211 ncoo_o += nz; 7212 } else { /* this is a local row */ 7213 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7214 } 7215 } 7216 } 7217 mmdata->off[cp + 1] = idxoff; 7218 mmdata->own[cp + 1] = idxown; 7219 } 7220 7221 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7222 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7223 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7224 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7225 ncoo = ncoo_d + ncoo_oown + ncoo2; 7226 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7227 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7228 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7229 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7230 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7231 PetscCall(PetscFree2(coo_i,coo_j)); 7232 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7233 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7234 coo_i = coo_i2; 7235 coo_j = coo_j2; 7236 } else { /* no offproc values insertion */ 7237 ncoo = ncoo_d; 7238 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7239 7240 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7241 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7242 PetscCall(PetscSFSetUp(mmdata->sf)); 7243 } 7244 mmdata->hasoffproc = hasoffproc; 7245 7246 /* gather (i,j) of nonzeros inserted locally */ 7247 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7248 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7249 PetscInt *coi = coo_i + ncoo_d; 7250 PetscInt *coj = coo_j + ncoo_d; 7251 const PetscInt *jj = mm->j; 7252 const PetscInt *ii = mm->i; 7253 const PetscInt *cmap = cmapa[cp]; 7254 const PetscInt *rmap = rmapa[cp]; 7255 const PetscInt mr = mp[cp]->rmap->n; 7256 const PetscInt rs = C->rmap->rstart; 7257 const PetscInt re = C->rmap->rend; 7258 const PetscInt cs = C->cmap->rstart; 7259 7260 if (mptmp[cp]) continue; 7261 if (rmapt[cp] == 1) { /* consecutive rows */ 7262 /* fill coo_i */ 7263 for (i = 0; i < mr; i++) { 7264 const PetscInt gr = i + rs; 7265 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7266 } 7267 /* fill coo_j */ 7268 if (!cmapt[cp]) { /* type-0, already global */ 7269 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7270 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7271 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7272 } else { /* type-2, local to global for sparse columns */ 7273 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7274 } 7275 ncoo_d += mm->nz; 7276 } else if (rmapt[cp] == 2) { /* sparse rows */ 7277 for (i = 0; i < mr; i++) { 7278 const PetscInt *jj = mm->j + ii[i]; 7279 const PetscInt gr = rmap[i]; 7280 const PetscInt nz = ii[i+1] - ii[i]; 7281 if (gr >= rs && gr < re) { /* local rows */ 7282 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7283 if (!cmapt[cp]) { /* type-0, already global */ 7284 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7285 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7286 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7287 } else { /* type-2, local to global for sparse columns */ 7288 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7289 } 7290 ncoo_d += nz; 7291 } 7292 } 7293 } 7294 } 7295 if (glob) { 7296 PetscCall(ISRestoreIndices(glob,&globidx)); 7297 } 7298 PetscCall(ISDestroy(&glob)); 7299 if (P_oth_l2g) { 7300 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7301 } 7302 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7303 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7304 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7305 7306 /* preallocate with COO data */ 7307 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7308 PetscCall(PetscFree2(coo_i,coo_j)); 7309 PetscFunctionReturn(0); 7310 } 7311 7312 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7313 { 7314 Mat_Product *product = mat->product; 7315 #if defined(PETSC_HAVE_DEVICE) 7316 PetscBool match = PETSC_FALSE; 7317 PetscBool usecpu = PETSC_FALSE; 7318 #else 7319 PetscBool match = PETSC_TRUE; 7320 #endif 7321 7322 PetscFunctionBegin; 7323 MatCheckProduct(mat,1); 7324 #if defined(PETSC_HAVE_DEVICE) 7325 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7326 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7327 } 7328 if (match) { /* we can always fallback to the CPU if requested */ 7329 PetscErrorCode ierr; 7330 switch (product->type) { 7331 case MATPRODUCT_AB: 7332 if (product->api_user) { 7333 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");PetscCall(ierr); 7334 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7335 ierr = PetscOptionsEnd();PetscCall(ierr); 7336 } else { 7337 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");PetscCall(ierr); 7338 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7339 ierr = PetscOptionsEnd();PetscCall(ierr); 7340 } 7341 break; 7342 case MATPRODUCT_AtB: 7343 if (product->api_user) { 7344 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");PetscCall(ierr); 7345 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7346 ierr = PetscOptionsEnd();PetscCall(ierr); 7347 } else { 7348 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");PetscCall(ierr); 7349 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7350 ierr = PetscOptionsEnd();PetscCall(ierr); 7351 } 7352 break; 7353 case MATPRODUCT_PtAP: 7354 if (product->api_user) { 7355 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");PetscCall(ierr); 7356 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7357 ierr = PetscOptionsEnd();PetscCall(ierr); 7358 } else { 7359 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");PetscCall(ierr); 7360 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7361 ierr = PetscOptionsEnd();PetscCall(ierr); 7362 } 7363 break; 7364 default: 7365 break; 7366 } 7367 match = (PetscBool)!usecpu; 7368 } 7369 #endif 7370 if (match) { 7371 switch (product->type) { 7372 case MATPRODUCT_AB: 7373 case MATPRODUCT_AtB: 7374 case MATPRODUCT_PtAP: 7375 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7376 break; 7377 default: 7378 break; 7379 } 7380 } 7381 /* fallback to MPIAIJ ops */ 7382 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7383 PetscFunctionReturn(0); 7384 } 7385 7386 /* 7387 Special version for direct calls from Fortran 7388 */ 7389 #include <petsc/private/fortranimpl.h> 7390 7391 /* Change these macros so can be used in void function */ 7392 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7393 #undef PetscCall 7394 #define PetscCall(...) do { \ 7395 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7396 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7397 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7398 return; \ 7399 } \ 7400 } while (0) 7401 7402 #undef SETERRQ 7403 #define SETERRQ(comm,ierr,...) do { \ 7404 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7405 return; \ 7406 } while (0) 7407 7408 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7409 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7410 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7411 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7412 #else 7413 #endif 7414 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7415 { 7416 Mat mat = *mmat; 7417 PetscInt m = *mm, n = *mn; 7418 InsertMode addv = *maddv; 7419 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7420 PetscScalar value; 7421 7422 MatCheckPreallocated(mat,1); 7423 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7424 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7425 { 7426 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7427 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7428 PetscBool roworiented = aij->roworiented; 7429 7430 /* Some Variables required in the macro */ 7431 Mat A = aij->A; 7432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7433 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7434 MatScalar *aa; 7435 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7436 Mat B = aij->B; 7437 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7438 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7439 MatScalar *ba; 7440 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7441 * cannot use "#if defined" inside a macro. */ 7442 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7443 7444 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7445 PetscInt nonew = a->nonew; 7446 MatScalar *ap1,*ap2; 7447 7448 PetscFunctionBegin; 7449 PetscCall(MatSeqAIJGetArray(A,&aa)); 7450 PetscCall(MatSeqAIJGetArray(B,&ba)); 7451 for (i=0; i<m; i++) { 7452 if (im[i] < 0) continue; 7453 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7454 if (im[i] >= rstart && im[i] < rend) { 7455 row = im[i] - rstart; 7456 lastcol1 = -1; 7457 rp1 = aj + ai[row]; 7458 ap1 = aa + ai[row]; 7459 rmax1 = aimax[row]; 7460 nrow1 = ailen[row]; 7461 low1 = 0; 7462 high1 = nrow1; 7463 lastcol2 = -1; 7464 rp2 = bj + bi[row]; 7465 ap2 = ba + bi[row]; 7466 rmax2 = bimax[row]; 7467 nrow2 = bilen[row]; 7468 low2 = 0; 7469 high2 = nrow2; 7470 7471 for (j=0; j<n; j++) { 7472 if (roworiented) value = v[i*n+j]; 7473 else value = v[i+j*m]; 7474 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7475 if (in[j] >= cstart && in[j] < cend) { 7476 col = in[j] - cstart; 7477 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7478 } else if (in[j] < 0) continue; 7479 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7480 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7481 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7482 } else { 7483 if (mat->was_assembled) { 7484 if (!aij->colmap) { 7485 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7486 } 7487 #if defined(PETSC_USE_CTABLE) 7488 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7489 col--; 7490 #else 7491 col = aij->colmap[in[j]] - 1; 7492 #endif 7493 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7494 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7495 col = in[j]; 7496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7497 B = aij->B; 7498 b = (Mat_SeqAIJ*)B->data; 7499 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7500 rp2 = bj + bi[row]; 7501 ap2 = ba + bi[row]; 7502 rmax2 = bimax[row]; 7503 nrow2 = bilen[row]; 7504 low2 = 0; 7505 high2 = nrow2; 7506 bm = aij->B->rmap->n; 7507 ba = b->a; 7508 inserted = PETSC_FALSE; 7509 } 7510 } else col = in[j]; 7511 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7512 } 7513 } 7514 } else if (!aij->donotstash) { 7515 if (roworiented) { 7516 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7517 } else { 7518 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7519 } 7520 } 7521 } 7522 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7523 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7524 } 7525 PetscFunctionReturnVoid(); 7526 } 7527 /* Undefining these here since they were redefined from their original definition above! No 7528 * other PETSc functions should be defined past this point, as it is impossible to recover the 7529 * original definitions */ 7530 #undef PetscCall 7531 #undef SETERRQ 7532