1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 51 PetscFunctionBegin; 52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53 A->boundtocpu = flg; 54 #endif 55 if (a->A) { 56 PetscCall(MatBindToCPU(a->A,flg)); 57 } 58 if (a->B) { 59 PetscCall(MatBindToCPU(a->B,flg)); 60 } 61 62 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 63 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 64 * to differ from the parent matrix. */ 65 if (a->lvec) { 66 PetscCall(VecBindToCPU(a->lvec,flg)); 67 } 68 if (a->diag) { 69 PetscCall(VecBindToCPU(a->diag,flg)); 70 } 71 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 76 { 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 78 79 PetscFunctionBegin; 80 if (mat->A) { 81 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 82 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 83 } 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 88 { 89 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 90 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 91 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 92 const PetscInt *ia,*ib; 93 const MatScalar *aa,*bb,*aav,*bav; 94 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 95 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 96 97 PetscFunctionBegin; 98 *keptrows = NULL; 99 100 ia = a->i; 101 ib = b->i; 102 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 103 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 104 for (i=0; i<m; i++) { 105 na = ia[i+1] - ia[i]; 106 nb = ib[i+1] - ib[i]; 107 if (!na && !nb) { 108 cnt++; 109 goto ok1; 110 } 111 aa = aav + ia[i]; 112 for (j=0; j<na; j++) { 113 if (aa[j] != 0.0) goto ok1; 114 } 115 bb = bav + ib[i]; 116 for (j=0; j <nb; j++) { 117 if (bb[j] != 0.0) goto ok1; 118 } 119 cnt++; 120 ok1:; 121 } 122 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123 if (!n0rows) { 124 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126 PetscFunctionReturn(0); 127 } 128 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 129 cnt = 0; 130 for (i=0; i<m; i++) { 131 na = ia[i+1] - ia[i]; 132 nb = ib[i+1] - ib[i]; 133 if (!na && !nb) continue; 134 aa = aav + ia[i]; 135 for (j=0; j<na;j++) { 136 if (aa[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 bb = bav + ib[i]; 142 for (j=0; j<nb; j++) { 143 if (bb[j] != 0.0) { 144 rows[cnt++] = rstart + i; 145 goto ok2; 146 } 147 } 148 ok2:; 149 } 150 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 151 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 152 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 159 PetscBool cong; 160 161 PetscFunctionBegin; 162 PetscCall(MatHasCongruentLayouts(Y,&cong)); 163 if (Y->assembled && cong) { 164 PetscCall(MatDiagonalSet(aij->A,D,is)); 165 } else { 166 PetscCall(MatDiagonalSet_Default(Y,D,is)); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174 PetscInt i,rstart,nrows,*rows; 175 176 PetscFunctionBegin; 177 *zrows = NULL; 178 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 179 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180 for (i=0; i<nrows; i++) rows[i] += rstart; 181 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188 PetscInt i,m,n,*garray = aij->garray; 189 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 190 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 191 PetscReal *work; 192 const PetscScalar *dummy; 193 194 PetscFunctionBegin; 195 PetscCall(MatGetSize(A,&m,&n)); 196 PetscCall(PetscCalloc1(n,&work)); 197 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 198 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 199 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 200 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201 if (type == NORM_2) { 202 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 203 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 204 } 205 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 206 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 207 } 208 } else if (type == NORM_1) { 209 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 210 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 211 } 212 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 213 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } 215 } else if (type == NORM_INFINITY) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 221 } 222 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } 229 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235 } 236 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237 if (type == NORM_INFINITY) { 238 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 239 } else { 240 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 241 } 242 PetscCall(PetscFree(work)); 243 if (type == NORM_2) { 244 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246 for (i=0; i<n; i++) reductions[i] /= m; 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252 { 253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254 IS sis,gis; 255 const PetscInt *isis,*igis; 256 PetscInt n,*iis,nsis,ngis,rstart,i; 257 258 PetscFunctionBegin; 259 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 260 PetscCall(MatFindNonzeroRows(a->B,&gis)); 261 PetscCall(ISGetSize(gis,&ngis)); 262 PetscCall(ISGetSize(sis,&nsis)); 263 PetscCall(ISGetIndices(sis,&isis)); 264 PetscCall(ISGetIndices(gis,&igis)); 265 266 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 267 PetscCall(PetscArraycpy(iis,igis,ngis)); 268 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269 n = ngis + nsis; 270 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 271 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272 for (i=0; i<n; i++) iis[i] += rstart; 273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274 275 PetscCall(ISRestoreIndices(sis,&isis)); 276 PetscCall(ISRestoreIndices(gis,&igis)); 277 PetscCall(ISDestroy(&sis)); 278 PetscCall(ISDestroy(&gis)); 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Local utility routine that creates a mapping from the global column 284 number to the local number in the off-diagonal part of the local 285 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 286 a slightly higher hash table cost; without it it is not scalable (each processor 287 has an order N integer array but is fast to access. 288 */ 289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 290 { 291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292 PetscInt n = aij->B->cmap->n,i; 293 294 PetscFunctionBegin; 295 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298 for (i=0; i<n; i++) { 299 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300 } 301 #else 302 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 303 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305 #endif 306 PetscFunctionReturn(0); 307 } 308 309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 310 { \ 311 if (col <= lastcol1) low1 = 0; \ 312 else high1 = nrow1; \ 313 lastcol1 = col;\ 314 while (high1-low1 > 5) { \ 315 t = (low1+high1)/2; \ 316 if (rp1[t] > col) high1 = t; \ 317 else low1 = t; \ 318 } \ 319 for (_i=low1; _i<high1; _i++) { \ 320 if (rp1[_i] > col) break; \ 321 if (rp1[_i] == col) { \ 322 if (addv == ADD_VALUES) { \ 323 ap1[_i] += value; \ 324 /* Not sure LogFlops will slow dow the code or not */ \ 325 (void)PetscLogFlops(1.0); \ 326 } \ 327 else ap1[_i] = value; \ 328 goto a_noinsert; \ 329 } \ 330 } \ 331 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 333 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335 N = nrow1++ - 1; a->nz++; high1++; \ 336 /* shift up all the later entries in this row */ \ 337 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 338 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339 rp1[_i] = col; \ 340 ap1[_i] = value; \ 341 A->nonzerostate++;\ 342 a_noinsert: ; \ 343 ailen[row] = nrow1; \ 344 } 345 346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 347 { \ 348 if (col <= lastcol2) low2 = 0; \ 349 else high2 = nrow2; \ 350 lastcol2 = col; \ 351 while (high2-low2 > 5) { \ 352 t = (low2+high2)/2; \ 353 if (rp2[t] > col) high2 = t; \ 354 else low2 = t; \ 355 } \ 356 for (_i=low2; _i<high2; _i++) { \ 357 if (rp2[_i] > col) break; \ 358 if (rp2[_i] == col) { \ 359 if (addv == ADD_VALUES) { \ 360 ap2[_i] += value; \ 361 (void)PetscLogFlops(1.0); \ 362 } \ 363 else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 369 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371 N = nrow2++ - 1; b->nz++; high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 374 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert: ; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 383 { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 386 PetscInt l,*garray = mat->garray,diag; 387 PetscScalar *aa,*ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 394 row = row - diag; 395 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 396 if (garray[b->j[b->i[row]+l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 400 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row+1]-a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 407 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row+1]-b->i[row]-l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 421 { 422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423 PetscScalar value = 0.0; 424 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426 PetscBool roworiented = aij->roworiented; 427 428 /* Some Variables required in the macro */ 429 Mat A = aij->A; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432 PetscBool ignorezeroentries = a->ignorezeroentries; 433 Mat B = aij->B; 434 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436 MatScalar *aa,*ba; 437 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 438 PetscInt nonew; 439 MatScalar *ap1,*ap2; 440 441 PetscFunctionBegin; 442 PetscCall(MatSeqAIJGetArray(A,&aa)); 443 PetscCall(MatSeqAIJGetArray(B,&ba)); 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 } else if (in[j] < 0) continue; 472 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477 } 478 #if defined(PETSC_USE_CTABLE) 479 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 485 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 500 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 501 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 502 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 503 } 504 } else col = in[j]; 505 nonew = b->nonew; 506 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 507 } 508 } 509 } else { 510 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515 } else { 516 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } 518 } 519 } 520 } 521 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 522 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 528 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 530 */ 531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539 PetscInt *ailen = a->ilen,*aj = a->j; 540 PetscInt *bilen = b->ilen,*bj = b->j; 541 PetscInt am = aij->A->rmap->n,j; 542 PetscInt diag_so_far = 0,dnz; 543 PetscInt offd_so_far = 0,onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j=0; j<am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 581 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen,*aj = a->j; 583 PetscInt *bilen = b->ilen,*bj = b->j; 584 PetscInt am = aij->A->rmap->n,j; 585 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587 PetscScalar *aa = a->a,*ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j=0; j<am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag+dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd+onz_row] = mat_j[col]; 604 ba[rowstart_offd+onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619 620 PetscFunctionBegin; 621 for (i=0; i<m; i++) { 622 if (idxm[i] < 0) continue; /* negative row */ 623 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624 if (idxm[i] >= rstart && idxm[i] < rend) { 625 row = idxm[i] - rstart; 626 for (j=0; j<n; j++) { 627 if (idxn[j] < 0) continue; /* negative column */ 628 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629 if (idxn[j] >= cstart && idxn[j] < cend) { 630 col = idxn[j] - cstart; 631 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632 } else { 633 if (!aij->colmap) { 634 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635 } 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638 col--; 639 #else 640 col = aij->colmap[idxn[j]] - 1; 641 #endif 642 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643 else { 644 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 662 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 663 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693 i = j; 694 } 695 } 696 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 703 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 704 } 705 #endif 706 PetscCall(MatAssemblyBegin(aij->A,mode)); 707 PetscCall(MatAssemblyEnd(aij->A,mode)); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 718 PetscCall(MatDisAssemble_MPIAIJ(mat)); 719 } 720 } 721 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723 } 724 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 725 #if defined(PETSC_HAVE_DEVICE) 726 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727 #endif 728 PetscCall(MatAssemblyBegin(aij->B,mode)); 729 PetscCall(MatAssemblyEnd(aij->B,mode)); 730 731 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 732 733 aij->rowvalues = NULL; 734 735 PetscCall(VecDestroy(&aij->diag)); 736 737 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 738 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 740 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741 } 742 #if defined(PETSC_HAVE_DEVICE) 743 mat->offloadmask = PETSC_OFFLOAD_BOTH; 744 #endif 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 749 { 750 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 751 752 PetscFunctionBegin; 753 PetscCall(MatZeroEntries(l->A)); 754 PetscCall(MatZeroEntries(l->B)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 759 { 760 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761 PetscObjectState sA, sB; 762 PetscInt *lrows; 763 PetscInt r, len; 764 PetscBool cong, lch, gch; 765 766 PetscFunctionBegin; 767 /* get locally owned rows */ 768 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 769 PetscCall(MatHasCongruentLayouts(A,&cong)); 770 /* fix right hand side if needed */ 771 if (x && b) { 772 const PetscScalar *xx; 773 PetscScalar *bb; 774 775 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 776 PetscCall(VecGetArrayRead(x, &xx)); 777 PetscCall(VecGetArray(b, &bb)); 778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 779 PetscCall(VecRestoreArrayRead(x, &xx)); 780 PetscCall(VecRestoreArray(b, &bb)); 781 } 782 783 sA = mat->A->nonzerostate; 784 sB = mat->B->nonzerostate; 785 786 if (diag != 0.0 && cong) { 787 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 788 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792 PetscInt nnwA, nnwB; 793 PetscBool nnzA, nnzB; 794 795 nnwA = aijA->nonew; 796 nnwB = aijB->nonew; 797 nnzA = aijA->keepnonzeropattern; 798 nnzB = aijB->keepnonzeropattern; 799 if (!nnzA) { 800 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801 aijA->nonew = 0; 802 } 803 if (!nnzB) { 804 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805 aijB->nonew = 0; 806 } 807 /* Must zero here before the next loop */ 808 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 809 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 810 for (r = 0; r < len; ++r) { 811 const PetscInt row = lrows[r] + A->rmap->rstart; 812 if (row >= A->cmap->N) continue; 813 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814 } 815 aijA->nonew = nnwA; 816 aijB->nonew = nnwB; 817 } else { 818 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 819 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 820 } 821 PetscCall(PetscFree(lrows)); 822 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 823 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 824 825 /* reduce nonzerostate */ 826 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 827 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828 if (gch) A->nonzerostate++; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 833 { 834 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 835 PetscMPIInt n = A->rmap->n; 836 PetscInt i,j,r,m,len = 0; 837 PetscInt *lrows,*owners = A->rmap->range; 838 PetscMPIInt p = 0; 839 PetscSFNode *rrows; 840 PetscSF sf; 841 const PetscScalar *xx; 842 PetscScalar *bb,*mask,*aij_a; 843 Vec xmask,lmask; 844 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845 const PetscInt *aj, *ii,*ridx; 846 PetscScalar *aa; 847 848 PetscFunctionBegin; 849 /* Create SF where leaves are input rows and roots are owned rows */ 850 PetscCall(PetscMalloc1(n, &lrows)); 851 for (r = 0; r < n; ++r) lrows[r] = -1; 852 PetscCall(PetscMalloc1(N, &rrows)); 853 for (r = 0; r < N; ++r) { 854 const PetscInt idx = rows[r]; 855 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 856 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 857 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 858 } 859 rrows[r].rank = p; 860 rrows[r].index = rows[r] - owners[p]; 861 } 862 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 863 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 864 /* Collect flags for rows to be zeroed */ 865 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 866 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 867 PetscCall(PetscSFDestroy(&sf)); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872 /* handle off diagonal part of matrix */ 873 PetscCall(MatCreateVecs(A,&xmask,NULL)); 874 PetscCall(VecDuplicate(l->lvec,&lmask)); 875 PetscCall(VecGetArray(xmask,&bb)); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 PetscCall(VecRestoreArray(xmask,&bb)); 878 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 880 PetscCall(VecDestroy(&xmask)); 881 if (x && b) { /* this code is buggy when the row and column layout don't match */ 882 PetscBool cong; 883 884 PetscCall(MatHasCongruentLayouts(A,&cong)); 885 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 886 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 888 PetscCall(VecGetArrayRead(l->lvec,&xx)); 889 PetscCall(VecGetArray(b,&bb)); 890 } 891 PetscCall(VecGetArray(lmask,&mask)); 892 /* remove zeroed rows of off diagonal matrix */ 893 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894 ii = aij->i; 895 for (i=0; i<len; i++) { 896 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 897 } 898 /* loop over all elements of off process part of matrix zeroing removed columns*/ 899 if (aij->compressedrow.use) { 900 m = aij->compressedrow.nrows; 901 ii = aij->compressedrow.i; 902 ridx = aij->compressedrow.rindex; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij_a + ii[i]; 907 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[*ridx] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 ridx++; 917 } 918 } else { /* do not use compressed row format */ 919 m = l->B->rmap->n; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij_a + ii[i]; 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[i] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 } 933 } 934 if (x && b) { 935 PetscCall(VecRestoreArray(b,&bb)); 936 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937 } 938 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 939 PetscCall(VecRestoreArray(lmask,&mask)); 940 PetscCall(VecDestroy(&lmask)); 941 PetscCall(PetscFree(lrows)); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscInt nt; 955 VecScatter Mvctx = a->Mvctx; 956 957 PetscFunctionBegin; 958 PetscCall(VecGetLocalSize(xx,&nt)); 959 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 960 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 961 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 962 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 963 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 971 PetscFunctionBegin; 972 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 VecScatter Mvctx = a->Mvctx; 980 981 PetscFunctionBegin; 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 /* do nondiagonal part */ 995 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996 /* do local part */ 997 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 998 /* add partial results together */ 999 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1000 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005 { 1006 MPI_Comm comm; 1007 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1008 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009 IS Me,Notme; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscBool lf; 1012 PetscMPIInt size; 1013 1014 PetscFunctionBegin; 1015 /* Easy test: symmetric diagonal block */ 1016 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1017 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1018 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019 if (!*f) PetscFunctionReturn(0); 1020 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1021 PetscCallMPI(MPI_Comm_size(comm,&size)); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 PetscCall(MatGetSize(Amat,&M,&N)); 1026 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1027 PetscCall(PetscMalloc1(N-last+first,¬me)); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1031 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1032 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1033 Aoff = Aoffs[0]; 1034 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1035 Boff = Boffs[0]; 1036 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1037 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1038 PetscCall(MatDestroyMatrices(1,&Boffs)); 1039 PetscCall(ISDestroy(&Me)); 1040 PetscCall(ISDestroy(&Notme)); 1041 PetscCall(PetscFree(notme)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscFunctionBegin; 1048 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059 /* do local part */ 1060 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1061 /* add partial results together */ 1062 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1063 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 PetscCall(MatGetDiagonal(a->A,v)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 1086 PetscFunctionBegin; 1087 PetscCall(MatScale(a->A,aa)); 1088 PetscCall(MatScale(a->B,aa)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094 { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099 PetscCall(PetscFree(aij->Aperm1)); 1100 PetscCall(PetscFree(aij->Bperm1)); 1101 PetscCall(PetscFree(aij->Ajmap1)); 1102 PetscCall(PetscFree(aij->Bjmap1)); 1103 1104 PetscCall(PetscFree(aij->Aimap2)); 1105 PetscCall(PetscFree(aij->Bimap2)); 1106 PetscCall(PetscFree(aij->Aperm2)); 1107 PetscCall(PetscFree(aij->Bperm2)); 1108 PetscCall(PetscFree(aij->Ajmap2)); 1109 PetscCall(PetscFree(aij->Bjmap2)); 1110 1111 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1112 PetscCall(PetscFree(aij->Cperm1)); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1117 { 1118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1119 1120 PetscFunctionBegin; 1121 #if defined(PETSC_USE_LOG) 1122 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1123 #endif 1124 PetscCall(MatStashDestroy_Private(&mat->stash)); 1125 PetscCall(VecDestroy(&aij->diag)); 1126 PetscCall(MatDestroy(&aij->A)); 1127 PetscCall(MatDestroy(&aij->B)); 1128 #if defined(PETSC_USE_CTABLE) 1129 PetscCall(PetscTableDestroy(&aij->colmap)); 1130 #else 1131 PetscCall(PetscFree(aij->colmap)); 1132 #endif 1133 PetscCall(PetscFree(aij->garray)); 1134 PetscCall(VecDestroy(&aij->lvec)); 1135 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1136 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1137 PetscCall(PetscFree(aij->ld)); 1138 1139 /* Free COO */ 1140 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1141 1142 PetscCall(PetscFree(mat->data)); 1143 1144 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1145 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1146 1147 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1148 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1150 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1151 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1157 #if defined(PETSC_HAVE_CUDA) 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1159 #endif 1160 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1162 #endif 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1164 #if defined(PETSC_HAVE_ELEMENTAL) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_SCALAPACK) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1169 #endif 1170 #if defined(PETSC_HAVE_HYPRE) 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1173 #endif 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1180 #if defined(PETSC_HAVE_MKL_SPARSE) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1182 #endif 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1192 { 1193 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1194 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1195 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1196 const PetscInt *garray = aij->garray; 1197 const PetscScalar *aa,*ba; 1198 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1199 PetscInt *rowlens; 1200 PetscInt *colidxs; 1201 PetscScalar *matvals; 1202 1203 PetscFunctionBegin; 1204 PetscCall(PetscViewerSetUp(viewer)); 1205 1206 M = mat->rmap->N; 1207 N = mat->cmap->N; 1208 m = mat->rmap->n; 1209 rs = mat->rmap->rstart; 1210 cs = mat->cmap->rstart; 1211 nz = A->nz + B->nz; 1212 1213 /* write matrix header */ 1214 header[0] = MAT_FILE_CLASSID; 1215 header[1] = M; header[2] = N; header[3] = nz; 1216 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1217 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1218 1219 /* fill in and store row lengths */ 1220 PetscCall(PetscMalloc1(m,&rowlens)); 1221 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1222 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1223 PetscCall(PetscFree(rowlens)); 1224 1225 /* fill in and store column indices */ 1226 PetscCall(PetscMalloc1(nz,&colidxs)); 1227 for (cnt=0, i=0; i<m; i++) { 1228 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1229 if (garray[B->j[jb]] > cs) break; 1230 colidxs[cnt++] = garray[B->j[jb]]; 1231 } 1232 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1233 colidxs[cnt++] = A->j[ja] + cs; 1234 for (; jb<B->i[i+1]; jb++) 1235 colidxs[cnt++] = garray[B->j[jb]]; 1236 } 1237 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1238 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1239 PetscCall(PetscFree(colidxs)); 1240 1241 /* fill in and store nonzero values */ 1242 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1243 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1244 PetscCall(PetscMalloc1(nz,&matvals)); 1245 for (cnt=0, i=0; i<m; i++) { 1246 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1247 if (garray[B->j[jb]] > cs) break; 1248 matvals[cnt++] = ba[jb]; 1249 } 1250 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1251 matvals[cnt++] = aa[ja]; 1252 for (; jb<B->i[i+1]; jb++) 1253 matvals[cnt++] = ba[jb]; 1254 } 1255 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1256 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1257 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1258 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1259 PetscCall(PetscFree(matvals)); 1260 1261 /* write block size option to the viewer's .info file */ 1262 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 #include <petscdraw.h> 1267 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1268 { 1269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1270 PetscMPIInt rank = aij->rank,size = aij->size; 1271 PetscBool isdraw,iascii,isbinary; 1272 PetscViewer sviewer; 1273 PetscViewerFormat format; 1274 1275 PetscFunctionBegin; 1276 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1277 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1278 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1279 if (iascii) { 1280 PetscCall(PetscViewerGetFormat(viewer,&format)); 1281 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1282 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1283 PetscCall(PetscMalloc1(size,&nz)); 1284 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1285 for (i=0; i<(PetscInt)size; i++) { 1286 nmax = PetscMax(nmax,nz[i]); 1287 nmin = PetscMin(nmin,nz[i]); 1288 navg += nz[i]; 1289 } 1290 PetscCall(PetscFree(nz)); 1291 navg = navg/size; 1292 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1293 PetscFunctionReturn(0); 1294 } 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1297 MatInfo info; 1298 PetscInt *inodes=NULL; 1299 1300 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1301 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1302 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1303 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1304 if (!inodes) { 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1306 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1307 } else { 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1309 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1310 } 1311 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1312 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1313 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1314 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1315 PetscCall(PetscViewerFlush(viewer)); 1316 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1318 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1321 PetscInt inodecount,inodelimit,*inodes; 1322 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1323 if (inodes) { 1324 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1325 } else { 1326 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1327 } 1328 PetscFunctionReturn(0); 1329 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1330 PetscFunctionReturn(0); 1331 } 1332 } else if (isbinary) { 1333 if (size == 1) { 1334 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1335 PetscCall(MatView(aij->A,viewer)); 1336 } else { 1337 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1338 } 1339 PetscFunctionReturn(0); 1340 } else if (iascii && size == 1) { 1341 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1342 PetscCall(MatView(aij->A,viewer)); 1343 PetscFunctionReturn(0); 1344 } else if (isdraw) { 1345 PetscDraw draw; 1346 PetscBool isnull; 1347 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1348 PetscCall(PetscDrawIsNull(draw,&isnull)); 1349 if (isnull) PetscFunctionReturn(0); 1350 } 1351 1352 { /* assemble the entire matrix onto first processor */ 1353 Mat A = NULL, Av; 1354 IS isrow,iscol; 1355 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1358 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1359 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1360 /* The commented code uses MatCreateSubMatrices instead */ 1361 /* 1362 Mat *AA, A = NULL, Av; 1363 IS isrow,iscol; 1364 1365 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1366 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1367 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1368 if (rank == 0) { 1369 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1370 A = AA[0]; 1371 Av = AA[0]; 1372 } 1373 PetscCall(MatDestroySubMatrices(1,&AA)); 1374 */ 1375 PetscCall(ISDestroy(&iscol)); 1376 PetscCall(ISDestroy(&isrow)); 1377 /* 1378 Everyone has to call to draw the matrix since the graphics waits are 1379 synchronized across all processors that share the PetscDraw object 1380 */ 1381 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1382 if (rank == 0) { 1383 if (((PetscObject)mat)->name) { 1384 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1385 } 1386 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1387 } 1388 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1389 PetscCall(PetscViewerFlush(viewer)); 1390 PetscCall(MatDestroy(&A)); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1396 { 1397 PetscBool iascii,isdraw,issocket,isbinary; 1398 1399 PetscFunctionBegin; 1400 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1401 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1402 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1403 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1404 if (iascii || isdraw || isbinary || issocket) { 1405 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1411 { 1412 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1413 Vec bb1 = NULL; 1414 PetscBool hasop; 1415 1416 PetscFunctionBegin; 1417 if (flag == SOR_APPLY_UPPER) { 1418 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1423 PetscCall(VecDuplicate(bb,&bb1)); 1424 } 1425 1426 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1427 if (flag & SOR_ZERO_INITIAL_GUESS) { 1428 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1429 its--; 1430 } 1431 1432 while (its--) { 1433 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1435 1436 /* update rhs: bb1 = bb - B*x */ 1437 PetscCall(VecScale(mat->lvec,-1.0)); 1438 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1439 1440 /* local sweep */ 1441 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1442 } 1443 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1444 if (flag & SOR_ZERO_INITIAL_GUESS) { 1445 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1446 its--; 1447 } 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec,-1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1458 } 1459 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1462 its--; 1463 } 1464 while (its--) { 1465 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 PetscCall(VecScale(mat->lvec,-1.0)); 1470 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1471 1472 /* local sweep */ 1473 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1474 } 1475 } else if (flag & SOR_EISENSTAT) { 1476 Vec xx1; 1477 1478 PetscCall(VecDuplicate(bb,&xx1)); 1479 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1480 1481 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483 if (!mat->diag) { 1484 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1485 PetscCall(MatGetDiagonal(matin,mat->diag)); 1486 } 1487 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1488 if (hasop) { 1489 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1490 } else { 1491 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1492 } 1493 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1494 1495 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1496 1497 /* local sweep */ 1498 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1499 PetscCall(VecAXPY(xx,1.0,xx1)); 1500 PetscCall(VecDestroy(&xx1)); 1501 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1502 1503 PetscCall(VecDestroy(&bb1)); 1504 1505 matin->factorerrortype = mat->A->factorerrortype; 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1510 { 1511 Mat aA,aB,Aperm; 1512 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1513 PetscScalar *aa,*ba; 1514 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1515 PetscSF rowsf,sf; 1516 IS parcolp = NULL; 1517 PetscBool done; 1518 1519 PetscFunctionBegin; 1520 PetscCall(MatGetLocalSize(A,&m,&n)); 1521 PetscCall(ISGetIndices(rowp,&rwant)); 1522 PetscCall(ISGetIndices(colp,&cwant)); 1523 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1524 1525 /* Invert row permutation to find out where my rows should go */ 1526 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1527 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1528 PetscCall(PetscSFSetFromOptions(rowsf)); 1529 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1530 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1531 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1532 1533 /* Invert column permutation to find out where my columns should go */ 1534 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1535 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1536 PetscCall(PetscSFSetFromOptions(sf)); 1537 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1538 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1539 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1540 PetscCall(PetscSFDestroy(&sf)); 1541 1542 PetscCall(ISRestoreIndices(rowp,&rwant)); 1543 PetscCall(ISRestoreIndices(colp,&cwant)); 1544 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1545 1546 /* Find out where my gcols should go */ 1547 PetscCall(MatGetSize(aB,NULL,&ng)); 1548 PetscCall(PetscMalloc1(ng,&gcdest)); 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1553 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1554 PetscCall(PetscSFDestroy(&sf)); 1555 1556 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1557 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1558 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1559 for (i=0; i<m; i++) { 1560 PetscInt row = rdest[i]; 1561 PetscMPIInt rowner; 1562 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1563 for (j=ai[i]; j<ai[i+1]; j++) { 1564 PetscInt col = cdest[aj[j]]; 1565 PetscMPIInt cowner; 1566 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1567 if (rowner == cowner) dnnz[i]++; 1568 else onnz[i]++; 1569 } 1570 for (j=bi[i]; j<bi[i+1]; j++) { 1571 PetscInt col = gcdest[bj[j]]; 1572 PetscMPIInt cowner; 1573 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 } 1578 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1579 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1580 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1581 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1582 PetscCall(PetscSFDestroy(&rowsf)); 1583 1584 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1585 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1586 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1587 for (i=0; i<m; i++) { 1588 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1589 PetscInt j0,rowlen; 1590 rowlen = ai[i+1] - ai[i]; 1591 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1592 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1593 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1594 } 1595 rowlen = bi[i+1] - bi[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { 1597 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1598 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1599 } 1600 } 1601 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1602 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1603 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1604 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1605 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1606 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1607 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1608 PetscCall(PetscFree3(work,rdest,cdest)); 1609 PetscCall(PetscFree(gcdest)); 1610 if (parcolp) PetscCall(ISDestroy(&colp)); 1611 *B = Aperm; 1612 PetscFunctionReturn(0); 1613 } 1614 1615 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1616 { 1617 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1618 1619 PetscFunctionBegin; 1620 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1621 if (ghosts) *ghosts = aij->garray; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1626 { 1627 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1628 Mat A = mat->A,B = mat->B; 1629 PetscLogDouble isend[5],irecv[5]; 1630 1631 PetscFunctionBegin; 1632 info->block_size = 1.0; 1633 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1634 1635 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1636 isend[3] = info->memory; isend[4] = info->mallocs; 1637 1638 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1639 1640 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1641 isend[3] += info->memory; isend[4] += info->mallocs; 1642 if (flag == MAT_LOCAL) { 1643 info->nz_used = isend[0]; 1644 info->nz_allocated = isend[1]; 1645 info->nz_unneeded = isend[2]; 1646 info->memory = isend[3]; 1647 info->mallocs = isend[4]; 1648 } else if (flag == MAT_GLOBAL_MAX) { 1649 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1650 1651 info->nz_used = irecv[0]; 1652 info->nz_allocated = irecv[1]; 1653 info->nz_unneeded = irecv[2]; 1654 info->memory = irecv[3]; 1655 info->mallocs = irecv[4]; 1656 } else if (flag == MAT_GLOBAL_SUM) { 1657 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } 1665 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1666 info->fill_ratio_needed = 0; 1667 info->factor_mallocs = 0; 1668 PetscFunctionReturn(0); 1669 } 1670 1671 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1672 { 1673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1674 1675 PetscFunctionBegin; 1676 switch (op) { 1677 case MAT_NEW_NONZERO_LOCATIONS: 1678 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1679 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1680 case MAT_KEEP_NONZERO_PATTERN: 1681 case MAT_NEW_NONZERO_LOCATION_ERR: 1682 case MAT_USE_INODES: 1683 case MAT_IGNORE_ZERO_ENTRIES: 1684 case MAT_FORM_EXPLICIT_TRANSPOSE: 1685 MatCheckPreallocated(A,1); 1686 PetscCall(MatSetOption(a->A,op,flg)); 1687 PetscCall(MatSetOption(a->B,op,flg)); 1688 break; 1689 case MAT_ROW_ORIENTED: 1690 MatCheckPreallocated(A,1); 1691 a->roworiented = flg; 1692 1693 PetscCall(MatSetOption(a->A,op,flg)); 1694 PetscCall(MatSetOption(a->B,op,flg)); 1695 break; 1696 case MAT_FORCE_DIAGONAL_ENTRIES: 1697 case MAT_SORTED_FULL: 1698 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1699 break; 1700 case MAT_IGNORE_OFF_PROC_ENTRIES: 1701 a->donotstash = flg; 1702 break; 1703 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1704 case MAT_SPD: 1705 case MAT_SYMMETRIC: 1706 case MAT_STRUCTURALLY_SYMMETRIC: 1707 case MAT_HERMITIAN: 1708 case MAT_SYMMETRY_ETERNAL: 1709 break; 1710 case MAT_SUBMAT_SINGLEIS: 1711 A->submat_singleis = flg; 1712 break; 1713 case MAT_STRUCTURE_ONLY: 1714 /* The option is handled directly by MatSetOption() */ 1715 break; 1716 default: 1717 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1723 { 1724 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1725 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1726 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1727 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1728 PetscInt *cmap,*idx_p; 1729 1730 PetscFunctionBegin; 1731 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1732 mat->getrowactive = PETSC_TRUE; 1733 1734 if (!mat->rowvalues && (idx || v)) { 1735 /* 1736 allocate enough space to hold information from the longest row. 1737 */ 1738 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1739 PetscInt max = 1,tmp; 1740 for (i=0; i<matin->rmap->n; i++) { 1741 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1742 if (max < tmp) max = tmp; 1743 } 1744 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1745 } 1746 1747 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1748 lrow = row - rstart; 1749 1750 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1751 if (!v) {pvA = NULL; pvB = NULL;} 1752 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1753 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i=0; i<nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1770 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i=0; i<imark; i++) { 1776 idx_p[i] = cmap[cworkB[i]]; 1777 } 1778 } else { 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1781 else break; 1782 } 1783 imark = i; 1784 } 1785 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1786 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1787 } 1788 } else { 1789 if (idx) *idx = NULL; 1790 if (v) *v = NULL; 1791 } 1792 } 1793 *nz = nztot; 1794 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1795 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1800 { 1801 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1802 1803 PetscFunctionBegin; 1804 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1805 aij->getrowactive = PETSC_FALSE; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1810 { 1811 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1812 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1813 PetscInt i,j,cstart = mat->cmap->rstart; 1814 PetscReal sum = 0.0; 1815 const MatScalar *v,*amata,*bmata; 1816 1817 PetscFunctionBegin; 1818 if (aij->size == 1) { 1819 PetscCall(MatNorm(aij->A,type,norm)); 1820 } else { 1821 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1822 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1823 if (type == NORM_FROBENIUS) { 1824 v = amata; 1825 for (i=0; i<amat->nz; i++) { 1826 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1827 } 1828 v = bmata; 1829 for (i=0; i<bmat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1831 } 1832 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp,*tmp2; 1837 PetscInt *jj,*garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1839 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1840 *norm = 0.0; 1841 v = amata; jj = amat->j; 1842 for (j=0; j<amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1844 } 1845 v = bmata; jj = bmat->j; 1846 for (j=0; j<bmat->nz; j++) { 1847 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1848 } 1849 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1850 for (j=0; j<mat->cmap->N; j++) { 1851 if (tmp2[j] > *norm) *norm = tmp2[j]; 1852 } 1853 PetscCall(PetscFree(tmp)); 1854 PetscCall(PetscFree(tmp2)); 1855 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1856 } else if (type == NORM_INFINITY) { /* max row norm */ 1857 PetscReal ntemp = 0.0; 1858 for (j=0; j<aij->A->rmap->n; j++) { 1859 v = amata + amat->i[j]; 1860 sum = 0.0; 1861 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); v++; 1863 } 1864 v = bmata + bmat->i[j]; 1865 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1866 sum += PetscAbsScalar(*v); v++; 1867 } 1868 if (sum > ntemp) ntemp = sum; 1869 } 1870 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1871 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1872 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1873 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1874 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1875 } 1876 PetscFunctionReturn(0); 1877 } 1878 1879 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1880 { 1881 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1882 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1883 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1884 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1885 Mat B,A_diag,*B_diag; 1886 const MatScalar *pbv,*bv; 1887 1888 PetscFunctionBegin; 1889 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1890 ai = Aloc->i; aj = Aloc->j; 1891 bi = Bloc->i; bj = Bloc->j; 1892 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1893 PetscInt *d_nnz,*g_nnz,*o_nnz; 1894 PetscSFNode *oloc; 1895 PETSC_UNUSED PetscSF sf; 1896 1897 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1898 /* compute d_nnz for preallocation */ 1899 PetscCall(PetscArrayzero(d_nnz,na)); 1900 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1901 /* compute local off-diagonal contributions */ 1902 PetscCall(PetscArrayzero(g_nnz,nb)); 1903 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1904 /* map those to global */ 1905 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1906 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1907 PetscCall(PetscSFSetFromOptions(sf)); 1908 PetscCall(PetscArrayzero(o_nnz,na)); 1909 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1910 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1911 PetscCall(PetscSFDestroy(&sf)); 1912 1913 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1914 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1915 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1916 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1917 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1918 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1919 } else { 1920 B = *matout; 1921 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1922 } 1923 1924 b = (Mat_MPIAIJ*)B->data; 1925 A_diag = a->A; 1926 B_diag = &b->A; 1927 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1928 A_diag_ncol = A_diag->cmap->N; 1929 B_diag_ilen = sub_B_diag->ilen; 1930 B_diag_i = sub_B_diag->i; 1931 1932 /* Set ilen for diagonal of B */ 1933 for (i=0; i<A_diag_ncol; i++) { 1934 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1935 } 1936 1937 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1938 very quickly (=without using MatSetValues), because all writes are local. */ 1939 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb],&cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i=0; i<mb; i++) { 1949 ncol = bi[i+1]-bi[i]; 1950 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1951 row++; 1952 pbv += ncol; cols_tmp += ncol; 1953 } 1954 PetscCall(PetscFree(cols)); 1955 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1956 1957 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1958 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1959 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1960 *matout = B; 1961 } else { 1962 PetscCall(MatHeaderMerge(A,&B)); 1963 } 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1968 { 1969 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1970 Mat a = aij->A,b = aij->B; 1971 PetscInt s1,s2,s3; 1972 1973 PetscFunctionBegin; 1974 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1975 if (rr) { 1976 PetscCall(VecGetLocalSize(rr,&s1)); 1977 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1978 /* Overlap communication with computation. */ 1979 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1980 } 1981 if (ll) { 1982 PetscCall(VecGetLocalSize(ll,&s1)); 1983 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1984 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1985 } 1986 /* scale the diagonal block */ 1987 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1988 1989 if (rr) { 1990 /* Do a scatter end and then right scale the off-diagonal block */ 1991 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1992 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1998 { 1999 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2000 2001 PetscFunctionBegin; 2002 PetscCall(MatSetUnfactored(a->A)); 2003 PetscFunctionReturn(0); 2004 } 2005 2006 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2007 { 2008 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2009 Mat a,b,c,d; 2010 PetscBool flg; 2011 2012 PetscFunctionBegin; 2013 a = matA->A; b = matA->B; 2014 c = matB->A; d = matB->B; 2015 2016 PetscCall(MatEqual(a,c,&flg)); 2017 if (flg) { 2018 PetscCall(MatEqual(b,d,&flg)); 2019 } 2020 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2021 PetscFunctionReturn(0); 2022 } 2023 2024 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2025 { 2026 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2027 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2028 2029 PetscFunctionBegin; 2030 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2031 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2032 /* because of the column compression in the off-processor part of the matrix a->B, 2033 the number of columns in a->B and b->B may be different, hence we cannot call 2034 the MatCopy() directly on the two parts. If need be, we can provide a more 2035 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2036 then copying the submatrices */ 2037 PetscCall(MatCopy_Basic(A,B,str)); 2038 } else { 2039 PetscCall(MatCopy(a->A,b->A,str)); 2040 PetscCall(MatCopy(a->B,b->B,str)); 2041 } 2042 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2047 { 2048 PetscFunctionBegin; 2049 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* 2054 Computes the number of nonzeros per row needed for preallocation when X and Y 2055 have different nonzero structure. 2056 */ 2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2058 { 2059 PetscInt i,j,k,nzx,nzy; 2060 2061 PetscFunctionBegin; 2062 /* Set the number of nonzeros in the new matrix */ 2063 for (i=0; i<m; i++) { 2064 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2065 nzx = xi[i+1] - xi[i]; 2066 nzy = yi[i+1] - yi[i]; 2067 nnz[i] = 0; 2068 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2069 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2070 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2071 nnz[i]++; 2072 } 2073 for (; k<nzy; k++) nnz[i]++; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2080 { 2081 PetscInt m = Y->rmap->N; 2082 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2083 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2091 { 2092 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2093 2094 PetscFunctionBegin; 2095 if (str == SAME_NONZERO_PATTERN) { 2096 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2097 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2098 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2099 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2100 } else { 2101 Mat B; 2102 PetscInt *nnz_d,*nnz_o; 2103 2104 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2105 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2106 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2107 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2108 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2109 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2110 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2111 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2112 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2113 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2114 PetscCall(MatHeaderMerge(Y,&B)); 2115 PetscCall(PetscFree(nnz_d)); 2116 PetscCall(PetscFree(nnz_o)); 2117 } 2118 PetscFunctionReturn(0); 2119 } 2120 2121 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2122 2123 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2124 { 2125 PetscFunctionBegin; 2126 if (PetscDefined(USE_COMPLEX)) { 2127 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2128 2129 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2130 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 2139 PetscFunctionBegin; 2140 PetscCall(MatRealPart(a->A)); 2141 PetscCall(MatRealPart(a->B)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2146 { 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2148 2149 PetscFunctionBegin; 2150 PetscCall(MatImaginaryPart(a->A)); 2151 PetscCall(MatImaginaryPart(a->B)); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2158 PetscInt i,*idxb = NULL,m = A->rmap->n; 2159 PetscScalar *va,*vv; 2160 Vec vB,vA; 2161 const PetscScalar *vb; 2162 2163 PetscFunctionBegin; 2164 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2165 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2166 2167 PetscCall(VecGetArrayWrite(vA,&va)); 2168 if (idx) { 2169 for (i=0; i<m; i++) { 2170 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2171 } 2172 } 2173 2174 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2175 PetscCall(PetscMalloc1(m,&idxb)); 2176 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2177 2178 PetscCall(VecGetArrayWrite(v,&vv)); 2179 PetscCall(VecGetArrayRead(vB,&vb)); 2180 for (i=0; i<m; i++) { 2181 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2182 vv[i] = vb[i]; 2183 if (idx) idx[i] = a->garray[idxb[i]]; 2184 } else { 2185 vv[i] = va[i]; 2186 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2187 idx[i] = a->garray[idxb[i]]; 2188 } 2189 } 2190 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2191 PetscCall(VecRestoreArrayWrite(vA,&va)); 2192 PetscCall(VecRestoreArrayRead(vB,&vb)); 2193 PetscCall(PetscFree(idxb)); 2194 PetscCall(VecDestroy(&vA)); 2195 PetscCall(VecDestroy(&vB)); 2196 PetscFunctionReturn(0); 2197 } 2198 2199 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2200 { 2201 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2202 PetscInt m = A->rmap->n,n = A->cmap->n; 2203 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2204 PetscInt *cmap = mat->garray; 2205 PetscInt *diagIdx, *offdiagIdx; 2206 Vec diagV, offdiagV; 2207 PetscScalar *a, *diagA, *offdiagA; 2208 const PetscScalar *ba,*bav; 2209 PetscInt r,j,col,ncols,*bi,*bj; 2210 Mat B = mat->B; 2211 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2212 2213 PetscFunctionBegin; 2214 /* When a process holds entire A and other processes have no entry */ 2215 if (A->cmap->N == n) { 2216 PetscCall(VecGetArrayWrite(v,&diagA)); 2217 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2218 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2219 PetscCall(VecDestroy(&diagV)); 2220 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2221 PetscFunctionReturn(0); 2222 } else if (n == 0) { 2223 if (m) { 2224 PetscCall(VecGetArrayWrite(v,&a)); 2225 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2226 PetscCall(VecRestoreArrayWrite(v,&a)); 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2232 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2233 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2234 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2235 2236 /* Get offdiagIdx[] for implicit 0.0 */ 2237 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2238 ba = bav; 2239 bi = b->i; 2240 bj = b->j; 2241 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2242 for (r = 0; r < m; r++) { 2243 ncols = bi[r+1] - bi[r]; 2244 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2245 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2246 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2247 offdiagA[r] = 0.0; 2248 2249 /* Find first hole in the cmap */ 2250 for (j=0; j<ncols; j++) { 2251 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2252 if (col > j && j < cstart) { 2253 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2254 break; 2255 } else if (col > j + n && j >= cstart) { 2256 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2257 break; 2258 } 2259 } 2260 if (j == ncols && ncols < A->cmap->N - n) { 2261 /* a hole is outside compressed Bcols */ 2262 if (ncols == 0) { 2263 if (cstart) { 2264 offdiagIdx[r] = 0; 2265 } else offdiagIdx[r] = cend; 2266 } else { /* ncols > 0 */ 2267 offdiagIdx[r] = cmap[ncols-1] + 1; 2268 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2269 } 2270 } 2271 } 2272 2273 for (j=0; j<ncols; j++) { 2274 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2275 ba++; bj++; 2276 } 2277 } 2278 2279 PetscCall(VecGetArrayWrite(v, &a)); 2280 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2281 for (r = 0; r < m; ++r) { 2282 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2283 a[r] = diagA[r]; 2284 if (idx) idx[r] = cstart + diagIdx[r]; 2285 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2286 a[r] = diagA[r]; 2287 if (idx) { 2288 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2289 idx[r] = cstart + diagIdx[r]; 2290 } else idx[r] = offdiagIdx[r]; 2291 } 2292 } else { 2293 a[r] = offdiagA[r]; 2294 if (idx) idx[r] = offdiagIdx[r]; 2295 } 2296 } 2297 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2298 PetscCall(VecRestoreArrayWrite(v, &a)); 2299 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2300 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2301 PetscCall(VecDestroy(&diagV)); 2302 PetscCall(VecDestroy(&offdiagV)); 2303 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2304 PetscFunctionReturn(0); 2305 } 2306 2307 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2308 { 2309 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2310 PetscInt m = A->rmap->n,n = A->cmap->n; 2311 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2312 PetscInt *cmap = mat->garray; 2313 PetscInt *diagIdx, *offdiagIdx; 2314 Vec diagV, offdiagV; 2315 PetscScalar *a, *diagA, *offdiagA; 2316 const PetscScalar *ba,*bav; 2317 PetscInt r,j,col,ncols,*bi,*bj; 2318 Mat B = mat->B; 2319 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2320 2321 PetscFunctionBegin; 2322 /* When a process holds entire A and other processes have no entry */ 2323 if (A->cmap->N == n) { 2324 PetscCall(VecGetArrayWrite(v,&diagA)); 2325 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2326 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2327 PetscCall(VecDestroy(&diagV)); 2328 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2329 PetscFunctionReturn(0); 2330 } else if (n == 0) { 2331 if (m) { 2332 PetscCall(VecGetArrayWrite(v,&a)); 2333 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2334 PetscCall(VecRestoreArrayWrite(v,&a)); 2335 } 2336 PetscFunctionReturn(0); 2337 } 2338 2339 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2340 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2341 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2342 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2343 2344 /* Get offdiagIdx[] for implicit 0.0 */ 2345 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2346 ba = bav; 2347 bi = b->i; 2348 bj = b->j; 2349 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2350 for (r = 0; r < m; r++) { 2351 ncols = bi[r+1] - bi[r]; 2352 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2353 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2354 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2355 offdiagA[r] = 0.0; 2356 2357 /* Find first hole in the cmap */ 2358 for (j=0; j<ncols; j++) { 2359 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2360 if (col > j && j < cstart) { 2361 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2362 break; 2363 } else if (col > j + n && j >= cstart) { 2364 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2365 break; 2366 } 2367 } 2368 if (j == ncols && ncols < A->cmap->N - n) { 2369 /* a hole is outside compressed Bcols */ 2370 if (ncols == 0) { 2371 if (cstart) { 2372 offdiagIdx[r] = 0; 2373 } else offdiagIdx[r] = cend; 2374 } else { /* ncols > 0 */ 2375 offdiagIdx[r] = cmap[ncols-1] + 1; 2376 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2377 } 2378 } 2379 } 2380 2381 for (j=0; j<ncols; j++) { 2382 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2383 ba++; bj++; 2384 } 2385 } 2386 2387 PetscCall(VecGetArrayWrite(v, &a)); 2388 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2389 for (r = 0; r < m; ++r) { 2390 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2391 a[r] = diagA[r]; 2392 if (idx) idx[r] = cstart + diagIdx[r]; 2393 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2394 a[r] = diagA[r]; 2395 if (idx) { 2396 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2397 idx[r] = cstart + diagIdx[r]; 2398 } else idx[r] = offdiagIdx[r]; 2399 } 2400 } else { 2401 a[r] = offdiagA[r]; 2402 if (idx) idx[r] = offdiagIdx[r]; 2403 } 2404 } 2405 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2406 PetscCall(VecRestoreArrayWrite(v, &a)); 2407 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2408 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2409 PetscCall(VecDestroy(&diagV)); 2410 PetscCall(VecDestroy(&offdiagV)); 2411 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2416 { 2417 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2418 PetscInt m = A->rmap->n,n = A->cmap->n; 2419 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2420 PetscInt *cmap = mat->garray; 2421 PetscInt *diagIdx, *offdiagIdx; 2422 Vec diagV, offdiagV; 2423 PetscScalar *a, *diagA, *offdiagA; 2424 const PetscScalar *ba,*bav; 2425 PetscInt r,j,col,ncols,*bi,*bj; 2426 Mat B = mat->B; 2427 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2428 2429 PetscFunctionBegin; 2430 /* When a process holds entire A and other processes have no entry */ 2431 if (A->cmap->N == n) { 2432 PetscCall(VecGetArrayWrite(v,&diagA)); 2433 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2434 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2435 PetscCall(VecDestroy(&diagV)); 2436 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2437 PetscFunctionReturn(0); 2438 } else if (n == 0) { 2439 if (m) { 2440 PetscCall(VecGetArrayWrite(v,&a)); 2441 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2442 PetscCall(VecRestoreArrayWrite(v,&a)); 2443 } 2444 PetscFunctionReturn(0); 2445 } 2446 2447 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2448 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2449 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2450 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2451 2452 /* Get offdiagIdx[] for implicit 0.0 */ 2453 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2454 ba = bav; 2455 bi = b->i; 2456 bj = b->j; 2457 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2458 for (r = 0; r < m; r++) { 2459 ncols = bi[r+1] - bi[r]; 2460 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2461 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2462 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2463 offdiagA[r] = 0.0; 2464 2465 /* Find first hole in the cmap */ 2466 for (j=0; j<ncols; j++) { 2467 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2468 if (col > j && j < cstart) { 2469 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2470 break; 2471 } else if (col > j + n && j >= cstart) { 2472 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2473 break; 2474 } 2475 } 2476 if (j == ncols && ncols < A->cmap->N - n) { 2477 /* a hole is outside compressed Bcols */ 2478 if (ncols == 0) { 2479 if (cstart) { 2480 offdiagIdx[r] = 0; 2481 } else offdiagIdx[r] = cend; 2482 } else { /* ncols > 0 */ 2483 offdiagIdx[r] = cmap[ncols-1] + 1; 2484 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2485 } 2486 } 2487 } 2488 2489 for (j=0; j<ncols; j++) { 2490 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2491 ba++; bj++; 2492 } 2493 } 2494 2495 PetscCall(VecGetArrayWrite(v, &a)); 2496 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2497 for (r = 0; r < m; ++r) { 2498 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2499 a[r] = diagA[r]; 2500 if (idx) idx[r] = cstart + diagIdx[r]; 2501 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2502 a[r] = diagA[r]; 2503 if (idx) { 2504 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2505 idx[r] = cstart + diagIdx[r]; 2506 } else idx[r] = offdiagIdx[r]; 2507 } 2508 } else { 2509 a[r] = offdiagA[r]; 2510 if (idx) idx[r] = offdiagIdx[r]; 2511 } 2512 } 2513 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2514 PetscCall(VecRestoreArrayWrite(v, &a)); 2515 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2516 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2517 PetscCall(VecDestroy(&diagV)); 2518 PetscCall(VecDestroy(&offdiagV)); 2519 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2524 { 2525 Mat *dummy; 2526 2527 PetscFunctionBegin; 2528 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2529 *newmat = *dummy; 2530 PetscCall(PetscFree(dummy)); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2535 { 2536 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2537 2538 PetscFunctionBegin; 2539 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2540 A->factorerrortype = a->A->factorerrortype; 2541 PetscFunctionReturn(0); 2542 } 2543 2544 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2545 { 2546 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2547 2548 PetscFunctionBegin; 2549 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2550 PetscCall(MatSetRandom(aij->A,rctx)); 2551 if (x->assembled) { 2552 PetscCall(MatSetRandom(aij->B,rctx)); 2553 } else { 2554 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2555 } 2556 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2557 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2562 { 2563 PetscFunctionBegin; 2564 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2565 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2566 PetscFunctionReturn(0); 2567 } 2568 2569 /*@ 2570 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2571 2572 Collective on Mat 2573 2574 Input Parameters: 2575 + A - the matrix 2576 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2577 2578 Level: advanced 2579 2580 @*/ 2581 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2582 { 2583 PetscFunctionBegin; 2584 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2589 { 2590 PetscBool sc = PETSC_FALSE,flg; 2591 2592 PetscFunctionBegin; 2593 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2594 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2595 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2596 if (flg) { 2597 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2598 } 2599 PetscOptionsHeadEnd(); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2604 { 2605 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2606 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2607 2608 PetscFunctionBegin; 2609 if (!Y->preallocated) { 2610 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2611 } else if (!aij->nz) { 2612 PetscInt nonew = aij->nonew; 2613 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2614 aij->nonew = nonew; 2615 } 2616 PetscCall(MatShift_Basic(Y,a)); 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2621 { 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 2624 PetscFunctionBegin; 2625 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2626 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2627 if (d) { 2628 PetscInt rstart; 2629 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2630 *d += rstart; 2631 2632 } 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2637 { 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639 2640 PetscFunctionBegin; 2641 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 /* -------------------------------------------------------------------*/ 2646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2647 MatGetRow_MPIAIJ, 2648 MatRestoreRow_MPIAIJ, 2649 MatMult_MPIAIJ, 2650 /* 4*/ MatMultAdd_MPIAIJ, 2651 MatMultTranspose_MPIAIJ, 2652 MatMultTransposeAdd_MPIAIJ, 2653 NULL, 2654 NULL, 2655 NULL, 2656 /*10*/ NULL, 2657 NULL, 2658 NULL, 2659 MatSOR_MPIAIJ, 2660 MatTranspose_MPIAIJ, 2661 /*15*/ MatGetInfo_MPIAIJ, 2662 MatEqual_MPIAIJ, 2663 MatGetDiagonal_MPIAIJ, 2664 MatDiagonalScale_MPIAIJ, 2665 MatNorm_MPIAIJ, 2666 /*20*/ MatAssemblyBegin_MPIAIJ, 2667 MatAssemblyEnd_MPIAIJ, 2668 MatSetOption_MPIAIJ, 2669 MatZeroEntries_MPIAIJ, 2670 /*24*/ MatZeroRows_MPIAIJ, 2671 NULL, 2672 NULL, 2673 NULL, 2674 NULL, 2675 /*29*/ MatSetUp_MPIAIJ, 2676 NULL, 2677 NULL, 2678 MatGetDiagonalBlock_MPIAIJ, 2679 NULL, 2680 /*34*/ MatDuplicate_MPIAIJ, 2681 NULL, 2682 NULL, 2683 NULL, 2684 NULL, 2685 /*39*/ MatAXPY_MPIAIJ, 2686 MatCreateSubMatrices_MPIAIJ, 2687 MatIncreaseOverlap_MPIAIJ, 2688 MatGetValues_MPIAIJ, 2689 MatCopy_MPIAIJ, 2690 /*44*/ MatGetRowMax_MPIAIJ, 2691 MatScale_MPIAIJ, 2692 MatShift_MPIAIJ, 2693 MatDiagonalSet_MPIAIJ, 2694 MatZeroRowsColumns_MPIAIJ, 2695 /*49*/ MatSetRandom_MPIAIJ, 2696 NULL, 2697 NULL, 2698 NULL, 2699 NULL, 2700 /*54*/ MatFDColoringCreate_MPIXAIJ, 2701 NULL, 2702 MatSetUnfactored_MPIAIJ, 2703 MatPermute_MPIAIJ, 2704 NULL, 2705 /*59*/ MatCreateSubMatrix_MPIAIJ, 2706 MatDestroy_MPIAIJ, 2707 MatView_MPIAIJ, 2708 NULL, 2709 NULL, 2710 /*64*/ NULL, 2711 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2712 NULL, 2713 NULL, 2714 NULL, 2715 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2716 MatGetRowMinAbs_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 NULL, 2721 /*75*/ MatFDColoringApply_AIJ, 2722 MatSetFromOptions_MPIAIJ, 2723 NULL, 2724 NULL, 2725 MatFindZeroDiagonals_MPIAIJ, 2726 /*80*/ NULL, 2727 NULL, 2728 NULL, 2729 /*83*/ MatLoad_MPIAIJ, 2730 MatIsSymmetric_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 NULL, 2735 /*89*/ NULL, 2736 NULL, 2737 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2738 NULL, 2739 NULL, 2740 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 MatBindToCPU_MPIAIJ, 2745 /*99*/ MatProductSetFromOptions_MPIAIJ, 2746 NULL, 2747 NULL, 2748 MatConjugate_MPIAIJ, 2749 NULL, 2750 /*104*/MatSetValuesRow_MPIAIJ, 2751 MatRealPart_MPIAIJ, 2752 MatImaginaryPart_MPIAIJ, 2753 NULL, 2754 NULL, 2755 /*109*/NULL, 2756 NULL, 2757 MatGetRowMin_MPIAIJ, 2758 NULL, 2759 MatMissingDiagonal_MPIAIJ, 2760 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2761 NULL, 2762 MatGetGhosts_MPIAIJ, 2763 NULL, 2764 NULL, 2765 /*119*/MatMultDiagonalBlock_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 MatGetMultiProcBlock_MPIAIJ, 2770 /*124*/MatFindNonzeroRows_MPIAIJ, 2771 MatGetColumnReductions_MPIAIJ, 2772 MatInvertBlockDiagonal_MPIAIJ, 2773 MatInvertVariableBlockDiagonal_MPIAIJ, 2774 MatCreateSubMatricesMPI_MPIAIJ, 2775 /*129*/NULL, 2776 NULL, 2777 NULL, 2778 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2779 NULL, 2780 /*134*/NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*139*/MatSetBlockSizes_MPIAIJ, 2786 NULL, 2787 NULL, 2788 MatFDColoringSetUp_MPIXAIJ, 2789 MatFindOffBlockDiagonalEntries_MPIAIJ, 2790 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2791 /*145*/NULL, 2792 NULL, 2793 NULL 2794 }; 2795 2796 /* ----------------------------------------------------------------------------------------*/ 2797 2798 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2799 { 2800 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2801 2802 PetscFunctionBegin; 2803 PetscCall(MatStoreValues(aij->A)); 2804 PetscCall(MatStoreValues(aij->B)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2809 { 2810 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2811 2812 PetscFunctionBegin; 2813 PetscCall(MatRetrieveValues(aij->A)); 2814 PetscCall(MatRetrieveValues(aij->B)); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2819 { 2820 Mat_MPIAIJ *b; 2821 PetscMPIInt size; 2822 2823 PetscFunctionBegin; 2824 PetscCall(PetscLayoutSetUp(B->rmap)); 2825 PetscCall(PetscLayoutSetUp(B->cmap)); 2826 b = (Mat_MPIAIJ*)B->data; 2827 2828 #if defined(PETSC_USE_CTABLE) 2829 PetscCall(PetscTableDestroy(&b->colmap)); 2830 #else 2831 PetscCall(PetscFree(b->colmap)); 2832 #endif 2833 PetscCall(PetscFree(b->garray)); 2834 PetscCall(VecDestroy(&b->lvec)); 2835 PetscCall(VecScatterDestroy(&b->Mvctx)); 2836 2837 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2838 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2839 PetscCall(MatDestroy(&b->B)); 2840 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2841 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2842 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2843 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2844 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2845 2846 if (!B->preallocated) { 2847 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2848 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2849 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2850 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2851 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2852 } 2853 2854 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2855 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2856 B->preallocated = PETSC_TRUE; 2857 B->was_assembled = PETSC_FALSE; 2858 B->assembled = PETSC_FALSE; 2859 PetscFunctionReturn(0); 2860 } 2861 2862 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2863 { 2864 Mat_MPIAIJ *b; 2865 2866 PetscFunctionBegin; 2867 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2868 PetscCall(PetscLayoutSetUp(B->rmap)); 2869 PetscCall(PetscLayoutSetUp(B->cmap)); 2870 b = (Mat_MPIAIJ*)B->data; 2871 2872 #if defined(PETSC_USE_CTABLE) 2873 PetscCall(PetscTableDestroy(&b->colmap)); 2874 #else 2875 PetscCall(PetscFree(b->colmap)); 2876 #endif 2877 PetscCall(PetscFree(b->garray)); 2878 PetscCall(VecDestroy(&b->lvec)); 2879 PetscCall(VecScatterDestroy(&b->Mvctx)); 2880 2881 PetscCall(MatResetPreallocation(b->A)); 2882 PetscCall(MatResetPreallocation(b->B)); 2883 B->preallocated = PETSC_TRUE; 2884 B->was_assembled = PETSC_FALSE; 2885 B->assembled = PETSC_FALSE; 2886 PetscFunctionReturn(0); 2887 } 2888 2889 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2890 { 2891 Mat mat; 2892 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2893 2894 PetscFunctionBegin; 2895 *newmat = NULL; 2896 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2897 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2898 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2899 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2900 a = (Mat_MPIAIJ*)mat->data; 2901 2902 mat->factortype = matin->factortype; 2903 mat->assembled = matin->assembled; 2904 mat->insertmode = NOT_SET_VALUES; 2905 mat->preallocated = matin->preallocated; 2906 2907 a->size = oldmat->size; 2908 a->rank = oldmat->rank; 2909 a->donotstash = oldmat->donotstash; 2910 a->roworiented = oldmat->roworiented; 2911 a->rowindices = NULL; 2912 a->rowvalues = NULL; 2913 a->getrowactive = PETSC_FALSE; 2914 2915 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2916 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2917 2918 if (oldmat->colmap) { 2919 #if defined(PETSC_USE_CTABLE) 2920 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2921 #else 2922 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2923 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2924 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2925 #endif 2926 } else a->colmap = NULL; 2927 if (oldmat->garray) { 2928 PetscInt len; 2929 len = oldmat->B->cmap->n; 2930 PetscCall(PetscMalloc1(len+1,&a->garray)); 2931 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2932 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2933 } else a->garray = NULL; 2934 2935 /* It may happen MatDuplicate is called with a non-assembled matrix 2936 In fact, MatDuplicate only requires the matrix to be preallocated 2937 This may happen inside a DMCreateMatrix_Shell */ 2938 if (oldmat->lvec) { 2939 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2941 } 2942 if (oldmat->Mvctx) { 2943 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2944 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2945 } 2946 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2947 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2948 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2949 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2950 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2951 *newmat = mat; 2952 PetscFunctionReturn(0); 2953 } 2954 2955 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2956 { 2957 PetscBool isbinary, ishdf5; 2958 2959 PetscFunctionBegin; 2960 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2961 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2962 /* force binary viewer to load .info file if it has not yet done so */ 2963 PetscCall(PetscViewerSetUp(viewer)); 2964 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2965 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2966 if (isbinary) { 2967 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2968 } else if (ishdf5) { 2969 #if defined(PETSC_HAVE_HDF5) 2970 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2971 #else 2972 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2973 #endif 2974 } else { 2975 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2976 } 2977 PetscFunctionReturn(0); 2978 } 2979 2980 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2981 { 2982 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2983 PetscInt *rowidxs,*colidxs; 2984 PetscScalar *matvals; 2985 2986 PetscFunctionBegin; 2987 PetscCall(PetscViewerSetUp(viewer)); 2988 2989 /* read in matrix header */ 2990 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 2991 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2992 M = header[1]; N = header[2]; nz = header[3]; 2993 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 2994 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 2995 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2996 2997 /* set block sizes from the viewer's .info file */ 2998 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 2999 /* set global sizes if not set already */ 3000 if (mat->rmap->N < 0) mat->rmap->N = M; 3001 if (mat->cmap->N < 0) mat->cmap->N = N; 3002 PetscCall(PetscLayoutSetUp(mat->rmap)); 3003 PetscCall(PetscLayoutSetUp(mat->cmap)); 3004 3005 /* check if the matrix sizes are correct */ 3006 PetscCall(MatGetSize(mat,&rows,&cols)); 3007 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3008 3009 /* read in row lengths and build row indices */ 3010 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3011 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3012 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3013 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3014 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3015 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3016 /* read in column indices and matrix values */ 3017 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3018 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3019 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3020 /* store matrix indices and values */ 3021 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3022 PetscCall(PetscFree(rowidxs)); 3023 PetscCall(PetscFree2(colidxs,matvals)); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 /* Not scalable because of ISAllGather() unless getting all columns. */ 3028 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3029 { 3030 IS iscol_local; 3031 PetscBool isstride; 3032 PetscMPIInt lisstride=0,gisstride; 3033 3034 PetscFunctionBegin; 3035 /* check if we are grabbing all columns*/ 3036 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3037 3038 if (isstride) { 3039 PetscInt start,len,mstart,mlen; 3040 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3041 PetscCall(ISGetLocalSize(iscol,&len)); 3042 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3043 if (mstart == start && mlen-mstart == len) lisstride = 1; 3044 } 3045 3046 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3047 if (gisstride) { 3048 PetscInt N; 3049 PetscCall(MatGetSize(mat,NULL,&N)); 3050 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3051 PetscCall(ISSetIdentity(iscol_local)); 3052 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3053 } else { 3054 PetscInt cbs; 3055 PetscCall(ISGetBlockSize(iscol,&cbs)); 3056 PetscCall(ISAllGather(iscol,&iscol_local)); 3057 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3058 } 3059 3060 *isseq = iscol_local; 3061 PetscFunctionReturn(0); 3062 } 3063 3064 /* 3065 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3066 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3067 3068 Input Parameters: 3069 mat - matrix 3070 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3071 i.e., mat->rstart <= isrow[i] < mat->rend 3072 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3073 i.e., mat->cstart <= iscol[i] < mat->cend 3074 Output Parameter: 3075 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3076 iscol_o - sequential column index set for retrieving mat->B 3077 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3078 */ 3079 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3080 { 3081 Vec x,cmap; 3082 const PetscInt *is_idx; 3083 PetscScalar *xarray,*cmaparray; 3084 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3085 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3086 Mat B=a->B; 3087 Vec lvec=a->lvec,lcmap; 3088 PetscInt i,cstart,cend,Bn=B->cmap->N; 3089 MPI_Comm comm; 3090 VecScatter Mvctx=a->Mvctx; 3091 3092 PetscFunctionBegin; 3093 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3094 PetscCall(ISGetLocalSize(iscol,&ncols)); 3095 3096 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3097 PetscCall(MatCreateVecs(mat,&x,NULL)); 3098 PetscCall(VecSet(x,-1.0)); 3099 PetscCall(VecDuplicate(x,&cmap)); 3100 PetscCall(VecSet(cmap,-1.0)); 3101 3102 /* Get start indices */ 3103 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3104 isstart -= ncols; 3105 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3106 3107 PetscCall(ISGetIndices(iscol,&is_idx)); 3108 PetscCall(VecGetArray(x,&xarray)); 3109 PetscCall(VecGetArray(cmap,&cmaparray)); 3110 PetscCall(PetscMalloc1(ncols,&idx)); 3111 for (i=0; i<ncols; i++) { 3112 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3113 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3114 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3115 } 3116 PetscCall(VecRestoreArray(x,&xarray)); 3117 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3118 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3119 3120 /* Get iscol_d */ 3121 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3122 PetscCall(ISGetBlockSize(iscol,&i)); 3123 PetscCall(ISSetBlockSize(*iscol_d,i)); 3124 3125 /* Get isrow_d */ 3126 PetscCall(ISGetLocalSize(isrow,&m)); 3127 rstart = mat->rmap->rstart; 3128 PetscCall(PetscMalloc1(m,&idx)); 3129 PetscCall(ISGetIndices(isrow,&is_idx)); 3130 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3131 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3132 3133 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3134 PetscCall(ISGetBlockSize(isrow,&i)); 3135 PetscCall(ISSetBlockSize(*isrow_d,i)); 3136 3137 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3138 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3139 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3140 3141 PetscCall(VecDuplicate(lvec,&lcmap)); 3142 3143 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3144 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3145 3146 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3147 /* off-process column indices */ 3148 count = 0; 3149 PetscCall(PetscMalloc1(Bn,&idx)); 3150 PetscCall(PetscMalloc1(Bn,&cmap1)); 3151 3152 PetscCall(VecGetArray(lvec,&xarray)); 3153 PetscCall(VecGetArray(lcmap,&cmaparray)); 3154 for (i=0; i<Bn; i++) { 3155 if (PetscRealPart(xarray[i]) > -1.0) { 3156 idx[count] = i; /* local column index in off-diagonal part B */ 3157 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3158 count++; 3159 } 3160 } 3161 PetscCall(VecRestoreArray(lvec,&xarray)); 3162 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3163 3164 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3165 /* cannot ensure iscol_o has same blocksize as iscol! */ 3166 3167 PetscCall(PetscFree(idx)); 3168 *garray = cmap1; 3169 3170 PetscCall(VecDestroy(&x)); 3171 PetscCall(VecDestroy(&cmap)); 3172 PetscCall(VecDestroy(&lcmap)); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3177 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3178 { 3179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3180 Mat M = NULL; 3181 MPI_Comm comm; 3182 IS iscol_d,isrow_d,iscol_o; 3183 Mat Asub = NULL,Bsub = NULL; 3184 PetscInt n; 3185 3186 PetscFunctionBegin; 3187 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3188 3189 if (call == MAT_REUSE_MATRIX) { 3190 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3191 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3192 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3193 3194 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3195 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3196 3197 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3198 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3199 3200 /* Update diagonal and off-diagonal portions of submat */ 3201 asub = (Mat_MPIAIJ*)(*submat)->data; 3202 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3203 PetscCall(ISGetLocalSize(iscol_o,&n)); 3204 if (n) { 3205 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3206 } 3207 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3208 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3209 3210 } else { /* call == MAT_INITIAL_MATRIX) */ 3211 const PetscInt *garray; 3212 PetscInt BsubN; 3213 3214 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3215 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3216 3217 /* Create local submatrices Asub and Bsub */ 3218 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3219 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3220 3221 /* Create submatrix M */ 3222 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3223 3224 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3225 asub = (Mat_MPIAIJ*)M->data; 3226 3227 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3228 n = asub->B->cmap->N; 3229 if (BsubN > n) { 3230 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3231 const PetscInt *idx; 3232 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3233 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3234 3235 PetscCall(PetscMalloc1(n,&idx_new)); 3236 j = 0; 3237 PetscCall(ISGetIndices(iscol_o,&idx)); 3238 for (i=0; i<n; i++) { 3239 if (j >= BsubN) break; 3240 while (subgarray[i] > garray[j]) j++; 3241 3242 if (subgarray[i] == garray[j]) { 3243 idx_new[i] = idx[j++]; 3244 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3245 } 3246 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3247 3248 PetscCall(ISDestroy(&iscol_o)); 3249 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3250 3251 } else if (BsubN < n) { 3252 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3253 } 3254 3255 PetscCall(PetscFree(garray)); 3256 *submat = M; 3257 3258 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3259 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3260 PetscCall(ISDestroy(&isrow_d)); 3261 3262 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3263 PetscCall(ISDestroy(&iscol_d)); 3264 3265 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3266 PetscCall(ISDestroy(&iscol_o)); 3267 } 3268 PetscFunctionReturn(0); 3269 } 3270 3271 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3272 { 3273 IS iscol_local=NULL,isrow_d; 3274 PetscInt csize; 3275 PetscInt n,i,j,start,end; 3276 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3277 MPI_Comm comm; 3278 3279 PetscFunctionBegin; 3280 /* If isrow has same processor distribution as mat, 3281 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3282 if (call == MAT_REUSE_MATRIX) { 3283 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3284 if (isrow_d) { 3285 sameRowDist = PETSC_TRUE; 3286 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3287 } else { 3288 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3289 if (iscol_local) { 3290 sameRowDist = PETSC_TRUE; 3291 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3292 } 3293 } 3294 } else { 3295 /* Check if isrow has same processor distribution as mat */ 3296 sameDist[0] = PETSC_FALSE; 3297 PetscCall(ISGetLocalSize(isrow,&n)); 3298 if (!n) { 3299 sameDist[0] = PETSC_TRUE; 3300 } else { 3301 PetscCall(ISGetMinMax(isrow,&i,&j)); 3302 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3303 if (i >= start && j < end) { 3304 sameDist[0] = PETSC_TRUE; 3305 } 3306 } 3307 3308 /* Check if iscol has same processor distribution as mat */ 3309 sameDist[1] = PETSC_FALSE; 3310 PetscCall(ISGetLocalSize(iscol,&n)); 3311 if (!n) { 3312 sameDist[1] = PETSC_TRUE; 3313 } else { 3314 PetscCall(ISGetMinMax(iscol,&i,&j)); 3315 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3316 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3317 } 3318 3319 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3320 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3321 sameRowDist = tsameDist[0]; 3322 } 3323 3324 if (sameRowDist) { 3325 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3326 /* isrow and iscol have same processor distribution as mat */ 3327 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3328 PetscFunctionReturn(0); 3329 } else { /* sameRowDist */ 3330 /* isrow has same processor distribution as mat */ 3331 if (call == MAT_INITIAL_MATRIX) { 3332 PetscBool sorted; 3333 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3334 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3335 PetscCall(ISGetSize(iscol,&i)); 3336 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3337 3338 PetscCall(ISSorted(iscol_local,&sorted)); 3339 if (sorted) { 3340 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3341 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3342 PetscFunctionReturn(0); 3343 } 3344 } else { /* call == MAT_REUSE_MATRIX */ 3345 IS iscol_sub; 3346 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3347 if (iscol_sub) { 3348 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3349 PetscFunctionReturn(0); 3350 } 3351 } 3352 } 3353 } 3354 3355 /* General case: iscol -> iscol_local which has global size of iscol */ 3356 if (call == MAT_REUSE_MATRIX) { 3357 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3358 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3359 } else { 3360 if (!iscol_local) { 3361 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3362 } 3363 } 3364 3365 PetscCall(ISGetLocalSize(iscol,&csize)); 3366 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3367 3368 if (call == MAT_INITIAL_MATRIX) { 3369 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3370 PetscCall(ISDestroy(&iscol_local)); 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374 3375 /*@C 3376 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3377 and "off-diagonal" part of the matrix in CSR format. 3378 3379 Collective 3380 3381 Input Parameters: 3382 + comm - MPI communicator 3383 . A - "diagonal" portion of matrix 3384 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3385 - garray - global index of B columns 3386 3387 Output Parameter: 3388 . mat - the matrix, with input A as its local diagonal matrix 3389 Level: advanced 3390 3391 Notes: 3392 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3393 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3394 3395 .seealso: MatCreateMPIAIJWithSplitArrays() 3396 @*/ 3397 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3398 { 3399 Mat_MPIAIJ *maij; 3400 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3401 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3402 const PetscScalar *oa; 3403 Mat Bnew; 3404 PetscInt m,n,N; 3405 3406 PetscFunctionBegin; 3407 PetscCall(MatCreate(comm,mat)); 3408 PetscCall(MatGetSize(A,&m,&n)); 3409 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3410 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3411 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3412 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3413 3414 /* Get global columns of mat */ 3415 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3416 3417 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3418 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3419 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3420 maij = (Mat_MPIAIJ*)(*mat)->data; 3421 3422 (*mat)->preallocated = PETSC_TRUE; 3423 3424 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3425 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3426 3427 /* Set A as diagonal portion of *mat */ 3428 maij->A = A; 3429 3430 nz = oi[m]; 3431 for (i=0; i<nz; i++) { 3432 col = oj[i]; 3433 oj[i] = garray[col]; 3434 } 3435 3436 /* Set Bnew as off-diagonal portion of *mat */ 3437 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3438 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3439 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3440 bnew = (Mat_SeqAIJ*)Bnew->data; 3441 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3442 maij->B = Bnew; 3443 3444 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3445 3446 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3447 b->free_a = PETSC_FALSE; 3448 b->free_ij = PETSC_FALSE; 3449 PetscCall(MatDestroy(&B)); 3450 3451 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3452 bnew->free_a = PETSC_TRUE; 3453 bnew->free_ij = PETSC_TRUE; 3454 3455 /* condense columns of maij->B */ 3456 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3457 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3458 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3459 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3460 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3461 PetscFunctionReturn(0); 3462 } 3463 3464 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3465 3466 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3467 { 3468 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3469 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3470 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3471 Mat M,Msub,B=a->B; 3472 MatScalar *aa; 3473 Mat_SeqAIJ *aij; 3474 PetscInt *garray = a->garray,*colsub,Ncols; 3475 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3476 IS iscol_sub,iscmap; 3477 const PetscInt *is_idx,*cmap; 3478 PetscBool allcolumns=PETSC_FALSE; 3479 MPI_Comm comm; 3480 3481 PetscFunctionBegin; 3482 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3483 if (call == MAT_REUSE_MATRIX) { 3484 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3485 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3486 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3487 3488 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3489 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3490 3491 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3492 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3493 3494 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3495 3496 } else { /* call == MAT_INITIAL_MATRIX) */ 3497 PetscBool flg; 3498 3499 PetscCall(ISGetLocalSize(iscol,&n)); 3500 PetscCall(ISGetSize(iscol,&Ncols)); 3501 3502 /* (1) iscol -> nonscalable iscol_local */ 3503 /* Check for special case: each processor gets entire matrix columns */ 3504 PetscCall(ISIdentity(iscol_local,&flg)); 3505 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3506 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3507 if (allcolumns) { 3508 iscol_sub = iscol_local; 3509 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3510 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3511 3512 } else { 3513 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3514 PetscInt *idx,*cmap1,k; 3515 PetscCall(PetscMalloc1(Ncols,&idx)); 3516 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3517 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3518 count = 0; 3519 k = 0; 3520 for (i=0; i<Ncols; i++) { 3521 j = is_idx[i]; 3522 if (j >= cstart && j < cend) { 3523 /* diagonal part of mat */ 3524 idx[count] = j; 3525 cmap1[count++] = i; /* column index in submat */ 3526 } else if (Bn) { 3527 /* off-diagonal part of mat */ 3528 if (j == garray[k]) { 3529 idx[count] = j; 3530 cmap1[count++] = i; /* column index in submat */ 3531 } else if (j > garray[k]) { 3532 while (j > garray[k] && k < Bn-1) k++; 3533 if (j == garray[k]) { 3534 idx[count] = j; 3535 cmap1[count++] = i; /* column index in submat */ 3536 } 3537 } 3538 } 3539 } 3540 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3541 3542 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3543 PetscCall(ISGetBlockSize(iscol,&cbs)); 3544 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3545 3546 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3547 } 3548 3549 /* (3) Create sequential Msub */ 3550 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3551 } 3552 3553 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3554 aij = (Mat_SeqAIJ*)(Msub)->data; 3555 ii = aij->i; 3556 PetscCall(ISGetIndices(iscmap,&cmap)); 3557 3558 /* 3559 m - number of local rows 3560 Ncols - number of columns (same on all processors) 3561 rstart - first row in new global matrix generated 3562 */ 3563 PetscCall(MatGetSize(Msub,&m,NULL)); 3564 3565 if (call == MAT_INITIAL_MATRIX) { 3566 /* (4) Create parallel newmat */ 3567 PetscMPIInt rank,size; 3568 PetscInt csize; 3569 3570 PetscCallMPI(MPI_Comm_size(comm,&size)); 3571 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3572 3573 /* 3574 Determine the number of non-zeros in the diagonal and off-diagonal 3575 portions of the matrix in order to do correct preallocation 3576 */ 3577 3578 /* first get start and end of "diagonal" columns */ 3579 PetscCall(ISGetLocalSize(iscol,&csize)); 3580 if (csize == PETSC_DECIDE) { 3581 PetscCall(ISGetSize(isrow,&mglobal)); 3582 if (mglobal == Ncols) { /* square matrix */ 3583 nlocal = m; 3584 } else { 3585 nlocal = Ncols/size + ((Ncols % size) > rank); 3586 } 3587 } else { 3588 nlocal = csize; 3589 } 3590 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3591 rstart = rend - nlocal; 3592 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3593 3594 /* next, compute all the lengths */ 3595 jj = aij->j; 3596 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3597 olens = dlens + m; 3598 for (i=0; i<m; i++) { 3599 jend = ii[i+1] - ii[i]; 3600 olen = 0; 3601 dlen = 0; 3602 for (j=0; j<jend; j++) { 3603 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3604 else dlen++; 3605 jj++; 3606 } 3607 olens[i] = olen; 3608 dlens[i] = dlen; 3609 } 3610 3611 PetscCall(ISGetBlockSize(isrow,&bs)); 3612 PetscCall(ISGetBlockSize(iscol,&cbs)); 3613 3614 PetscCall(MatCreate(comm,&M)); 3615 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3616 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3617 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3618 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3619 PetscCall(PetscFree(dlens)); 3620 3621 } else { /* call == MAT_REUSE_MATRIX */ 3622 M = *newmat; 3623 PetscCall(MatGetLocalSize(M,&i,NULL)); 3624 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3625 PetscCall(MatZeroEntries(M)); 3626 /* 3627 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3628 rather than the slower MatSetValues(). 3629 */ 3630 M->was_assembled = PETSC_TRUE; 3631 M->assembled = PETSC_FALSE; 3632 } 3633 3634 /* (5) Set values of Msub to *newmat */ 3635 PetscCall(PetscMalloc1(count,&colsub)); 3636 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3637 3638 jj = aij->j; 3639 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3640 for (i=0; i<m; i++) { 3641 row = rstart + i; 3642 nz = ii[i+1] - ii[i]; 3643 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3644 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3645 jj += nz; aa += nz; 3646 } 3647 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3648 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3649 3650 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3651 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3652 3653 PetscCall(PetscFree(colsub)); 3654 3655 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3656 if (call == MAT_INITIAL_MATRIX) { 3657 *newmat = M; 3658 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3659 PetscCall(MatDestroy(&Msub)); 3660 3661 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3662 PetscCall(ISDestroy(&iscol_sub)); 3663 3664 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3665 PetscCall(ISDestroy(&iscmap)); 3666 3667 if (iscol_local) { 3668 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3669 PetscCall(ISDestroy(&iscol_local)); 3670 } 3671 } 3672 PetscFunctionReturn(0); 3673 } 3674 3675 /* 3676 Not great since it makes two copies of the submatrix, first an SeqAIJ 3677 in local and then by concatenating the local matrices the end result. 3678 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3679 3680 Note: This requires a sequential iscol with all indices. 3681 */ 3682 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3683 { 3684 PetscMPIInt rank,size; 3685 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3686 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3687 Mat M,Mreuse; 3688 MatScalar *aa,*vwork; 3689 MPI_Comm comm; 3690 Mat_SeqAIJ *aij; 3691 PetscBool colflag,allcolumns=PETSC_FALSE; 3692 3693 PetscFunctionBegin; 3694 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3695 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3696 PetscCallMPI(MPI_Comm_size(comm,&size)); 3697 3698 /* Check for special case: each processor gets entire matrix columns */ 3699 PetscCall(ISIdentity(iscol,&colflag)); 3700 PetscCall(ISGetLocalSize(iscol,&n)); 3701 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3702 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3703 3704 if (call == MAT_REUSE_MATRIX) { 3705 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3706 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3707 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3708 } else { 3709 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3710 } 3711 3712 /* 3713 m - number of local rows 3714 n - number of columns (same on all processors) 3715 rstart - first row in new global matrix generated 3716 */ 3717 PetscCall(MatGetSize(Mreuse,&m,&n)); 3718 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3719 if (call == MAT_INITIAL_MATRIX) { 3720 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3721 ii = aij->i; 3722 jj = aij->j; 3723 3724 /* 3725 Determine the number of non-zeros in the diagonal and off-diagonal 3726 portions of the matrix in order to do correct preallocation 3727 */ 3728 3729 /* first get start and end of "diagonal" columns */ 3730 if (csize == PETSC_DECIDE) { 3731 PetscCall(ISGetSize(isrow,&mglobal)); 3732 if (mglobal == n) { /* square matrix */ 3733 nlocal = m; 3734 } else { 3735 nlocal = n/size + ((n % size) > rank); 3736 } 3737 } else { 3738 nlocal = csize; 3739 } 3740 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3741 rstart = rend - nlocal; 3742 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3743 3744 /* next, compute all the lengths */ 3745 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3746 olens = dlens + m; 3747 for (i=0; i<m; i++) { 3748 jend = ii[i+1] - ii[i]; 3749 olen = 0; 3750 dlen = 0; 3751 for (j=0; j<jend; j++) { 3752 if (*jj < rstart || *jj >= rend) olen++; 3753 else dlen++; 3754 jj++; 3755 } 3756 olens[i] = olen; 3757 dlens[i] = dlen; 3758 } 3759 PetscCall(MatCreate(comm,&M)); 3760 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3761 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3762 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3763 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3764 PetscCall(PetscFree(dlens)); 3765 } else { 3766 PetscInt ml,nl; 3767 3768 M = *newmat; 3769 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3770 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3771 PetscCall(MatZeroEntries(M)); 3772 /* 3773 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3774 rather than the slower MatSetValues(). 3775 */ 3776 M->was_assembled = PETSC_TRUE; 3777 M->assembled = PETSC_FALSE; 3778 } 3779 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3780 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3781 ii = aij->i; 3782 jj = aij->j; 3783 3784 /* trigger copy to CPU if needed */ 3785 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3786 for (i=0; i<m; i++) { 3787 row = rstart + i; 3788 nz = ii[i+1] - ii[i]; 3789 cwork = jj; jj += nz; 3790 vwork = aa; aa += nz; 3791 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3792 } 3793 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3794 3795 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3796 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3797 *newmat = M; 3798 3799 /* save submatrix used in processor for next request */ 3800 if (call == MAT_INITIAL_MATRIX) { 3801 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3802 PetscCall(MatDestroy(&Mreuse)); 3803 } 3804 PetscFunctionReturn(0); 3805 } 3806 3807 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3808 { 3809 PetscInt m,cstart, cend,j,nnz,i,d; 3810 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3811 const PetscInt *JJ; 3812 PetscBool nooffprocentries; 3813 3814 PetscFunctionBegin; 3815 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3816 3817 PetscCall(PetscLayoutSetUp(B->rmap)); 3818 PetscCall(PetscLayoutSetUp(B->cmap)); 3819 m = B->rmap->n; 3820 cstart = B->cmap->rstart; 3821 cend = B->cmap->rend; 3822 rstart = B->rmap->rstart; 3823 3824 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3825 3826 if (PetscDefined(USE_DEBUG)) { 3827 for (i=0; i<m; i++) { 3828 nnz = Ii[i+1]- Ii[i]; 3829 JJ = J + Ii[i]; 3830 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3831 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3832 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3833 } 3834 } 3835 3836 for (i=0; i<m; i++) { 3837 nnz = Ii[i+1]- Ii[i]; 3838 JJ = J + Ii[i]; 3839 nnz_max = PetscMax(nnz_max,nnz); 3840 d = 0; 3841 for (j=0; j<nnz; j++) { 3842 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3843 } 3844 d_nnz[i] = d; 3845 o_nnz[i] = nnz - d; 3846 } 3847 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3848 PetscCall(PetscFree2(d_nnz,o_nnz)); 3849 3850 for (i=0; i<m; i++) { 3851 ii = i + rstart; 3852 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3853 } 3854 nooffprocentries = B->nooffprocentries; 3855 B->nooffprocentries = PETSC_TRUE; 3856 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3857 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3858 B->nooffprocentries = nooffprocentries; 3859 3860 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3861 PetscFunctionReturn(0); 3862 } 3863 3864 /*@ 3865 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3866 (the default parallel PETSc format). 3867 3868 Collective 3869 3870 Input Parameters: 3871 + B - the matrix 3872 . i - the indices into j for the start of each local row (starts with zero) 3873 . j - the column indices for each local row (starts with zero) 3874 - v - optional values in the matrix 3875 3876 Level: developer 3877 3878 Notes: 3879 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3880 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3881 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3882 3883 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3884 3885 The format which is used for the sparse matrix input, is equivalent to a 3886 row-major ordering.. i.e for the following matrix, the input data expected is 3887 as shown 3888 3889 $ 1 0 0 3890 $ 2 0 3 P0 3891 $ ------- 3892 $ 4 5 6 P1 3893 $ 3894 $ Process0 [P0]: rows_owned=[0,1] 3895 $ i = {0,1,3} [size = nrow+1 = 2+1] 3896 $ j = {0,0,2} [size = 3] 3897 $ v = {1,2,3} [size = 3] 3898 $ 3899 $ Process1 [P1]: rows_owned=[2] 3900 $ i = {0,3} [size = nrow+1 = 1+1] 3901 $ j = {0,1,2} [size = 3] 3902 $ v = {4,5,6} [size = 3] 3903 3904 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3905 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3906 @*/ 3907 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3908 { 3909 PetscFunctionBegin; 3910 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3911 PetscFunctionReturn(0); 3912 } 3913 3914 /*@C 3915 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3916 (the default parallel PETSc format). For good matrix assembly performance 3917 the user should preallocate the matrix storage by setting the parameters 3918 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3919 performance can be increased by more than a factor of 50. 3920 3921 Collective 3922 3923 Input Parameters: 3924 + B - the matrix 3925 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3926 (same value is used for all local rows) 3927 . d_nnz - array containing the number of nonzeros in the various rows of the 3928 DIAGONAL portion of the local submatrix (possibly different for each row) 3929 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3930 The size of this array is equal to the number of local rows, i.e 'm'. 3931 For matrices that will be factored, you must leave room for (and set) 3932 the diagonal entry even if it is zero. 3933 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3934 submatrix (same value is used for all local rows). 3935 - o_nnz - array containing the number of nonzeros in the various rows of the 3936 OFF-DIAGONAL portion of the local submatrix (possibly different for 3937 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3938 structure. The size of this array is equal to the number 3939 of local rows, i.e 'm'. 3940 3941 If the *_nnz parameter is given then the *_nz parameter is ignored 3942 3943 The AIJ format (also called the Yale sparse matrix format or 3944 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3945 storage. The stored row and column indices begin with zero. 3946 See Users-Manual: ch_mat for details. 3947 3948 The parallel matrix is partitioned such that the first m0 rows belong to 3949 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3950 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3951 3952 The DIAGONAL portion of the local submatrix of a processor can be defined 3953 as the submatrix which is obtained by extraction the part corresponding to 3954 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3955 first row that belongs to the processor, r2 is the last row belonging to 3956 the this processor, and c1-c2 is range of indices of the local part of a 3957 vector suitable for applying the matrix to. This is an mxn matrix. In the 3958 common case of a square matrix, the row and column ranges are the same and 3959 the DIAGONAL part is also square. The remaining portion of the local 3960 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3961 3962 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3963 3964 You can call MatGetInfo() to get information on how effective the preallocation was; 3965 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3966 You can also run with the option -info and look for messages with the string 3967 malloc in them to see if additional memory allocation was needed. 3968 3969 Example usage: 3970 3971 Consider the following 8x8 matrix with 34 non-zero values, that is 3972 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3973 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3974 as follows: 3975 3976 .vb 3977 1 2 0 | 0 3 0 | 0 4 3978 Proc0 0 5 6 | 7 0 0 | 8 0 3979 9 0 10 | 11 0 0 | 12 0 3980 ------------------------------------- 3981 13 0 14 | 15 16 17 | 0 0 3982 Proc1 0 18 0 | 19 20 21 | 0 0 3983 0 0 0 | 22 23 0 | 24 0 3984 ------------------------------------- 3985 Proc2 25 26 27 | 0 0 28 | 29 0 3986 30 0 0 | 31 32 33 | 0 34 3987 .ve 3988 3989 This can be represented as a collection of submatrices as: 3990 3991 .vb 3992 A B C 3993 D E F 3994 G H I 3995 .ve 3996 3997 Where the submatrices A,B,C are owned by proc0, D,E,F are 3998 owned by proc1, G,H,I are owned by proc2. 3999 4000 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4001 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4002 The 'M','N' parameters are 8,8, and have the same values on all procs. 4003 4004 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4005 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4006 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4007 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4008 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4009 matrix, ans [DF] as another SeqAIJ matrix. 4010 4011 When d_nz, o_nz parameters are specified, d_nz storage elements are 4012 allocated for every row of the local diagonal submatrix, and o_nz 4013 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4014 One way to choose d_nz and o_nz is to use the max nonzerors per local 4015 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4016 In this case, the values of d_nz,o_nz are: 4017 .vb 4018 proc0 : dnz = 2, o_nz = 2 4019 proc1 : dnz = 3, o_nz = 2 4020 proc2 : dnz = 1, o_nz = 4 4021 .ve 4022 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4023 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4024 for proc3. i.e we are using 12+15+10=37 storage locations to store 4025 34 values. 4026 4027 When d_nnz, o_nnz parameters are specified, the storage is specified 4028 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4029 In the above case the values for d_nnz,o_nnz are: 4030 .vb 4031 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4032 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4033 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4034 .ve 4035 Here the space allocated is sum of all the above values i.e 34, and 4036 hence pre-allocation is perfect. 4037 4038 Level: intermediate 4039 4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4041 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4044 { 4045 PetscFunctionBegin; 4046 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4047 PetscValidType(B,1); 4048 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 /*@ 4053 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4054 CSR format for the local rows. 4055 4056 Collective 4057 4058 Input Parameters: 4059 + comm - MPI communicator 4060 . m - number of local rows (Cannot be PETSC_DECIDE) 4061 . n - This value should be the same as the local size used in creating the 4062 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4063 calculated if N is given) For square matrices n is almost always m. 4064 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4065 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4066 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4067 . j - column indices 4068 - a - matrix values 4069 4070 Output Parameter: 4071 . mat - the matrix 4072 4073 Level: intermediate 4074 4075 Notes: 4076 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4077 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4078 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4079 4080 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4081 4082 The format which is used for the sparse matrix input, is equivalent to a 4083 row-major ordering.. i.e for the following matrix, the input data expected is 4084 as shown 4085 4086 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4087 4088 $ 1 0 0 4089 $ 2 0 3 P0 4090 $ ------- 4091 $ 4 5 6 P1 4092 $ 4093 $ Process0 [P0]: rows_owned=[0,1] 4094 $ i = {0,1,3} [size = nrow+1 = 2+1] 4095 $ j = {0,0,2} [size = 3] 4096 $ v = {1,2,3} [size = 3] 4097 $ 4098 $ Process1 [P1]: rows_owned=[2] 4099 $ i = {0,3} [size = nrow+1 = 1+1] 4100 $ j = {0,1,2} [size = 3] 4101 $ v = {4,5,6} [size = 3] 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4105 @*/ 4106 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4107 { 4108 PetscFunctionBegin; 4109 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4110 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4111 PetscCall(MatCreate(comm,mat)); 4112 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4113 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4114 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4115 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4116 PetscFunctionReturn(0); 4117 } 4118 4119 /*@ 4120 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4121 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4122 4123 Collective 4124 4125 Input Parameters: 4126 + mat - the matrix 4127 . m - number of local rows (Cannot be PETSC_DECIDE) 4128 . n - This value should be the same as the local size used in creating the 4129 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4130 calculated if N is given) For square matrices n is almost always m. 4131 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4132 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4133 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4134 . J - column indices 4135 - v - matrix values 4136 4137 Level: intermediate 4138 4139 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4140 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4141 @*/ 4142 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4143 { 4144 PetscInt cstart,nnz,i,j; 4145 PetscInt *ld; 4146 PetscBool nooffprocentries; 4147 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4148 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4149 PetscScalar *ad,*ao; 4150 const PetscInt *Adi = Ad->i; 4151 PetscInt ldi,Iii,md; 4152 4153 PetscFunctionBegin; 4154 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4155 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4156 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4157 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4158 4159 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4160 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4161 cstart = mat->cmap->rstart; 4162 if (!Aij->ld) { 4163 /* count number of entries below block diagonal */ 4164 PetscCall(PetscCalloc1(m,&ld)); 4165 Aij->ld = ld; 4166 for (i=0; i<m; i++) { 4167 nnz = Ii[i+1]- Ii[i]; 4168 j = 0; 4169 while (J[j] < cstart && j < nnz) {j++;} 4170 J += nnz; 4171 ld[i] = j; 4172 } 4173 } else { 4174 ld = Aij->ld; 4175 } 4176 4177 for (i=0; i<m; i++) { 4178 nnz = Ii[i+1]- Ii[i]; 4179 Iii = Ii[i]; 4180 ldi = ld[i]; 4181 md = Adi[i+1]-Adi[i]; 4182 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4183 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4184 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4185 ad += md; 4186 ao += nnz - md; 4187 } 4188 nooffprocentries = mat->nooffprocentries; 4189 mat->nooffprocentries = PETSC_TRUE; 4190 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4191 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4192 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4193 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4194 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4195 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4196 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4197 mat->nooffprocentries = nooffprocentries; 4198 PetscFunctionReturn(0); 4199 } 4200 4201 /*@C 4202 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4203 (the default parallel PETSc format). For good matrix assembly performance 4204 the user should preallocate the matrix storage by setting the parameters 4205 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4206 performance can be increased by more than a factor of 50. 4207 4208 Collective 4209 4210 Input Parameters: 4211 + comm - MPI communicator 4212 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4213 This value should be the same as the local size used in creating the 4214 y vector for the matrix-vector product y = Ax. 4215 . n - This value should be the same as the local size used in creating the 4216 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4217 calculated if N is given) For square matrices n is almost always m. 4218 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4219 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4220 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4221 (same value is used for all local rows) 4222 . d_nnz - array containing the number of nonzeros in the various rows of the 4223 DIAGONAL portion of the local submatrix (possibly different for each row) 4224 or NULL, if d_nz is used to specify the nonzero structure. 4225 The size of this array is equal to the number of local rows, i.e 'm'. 4226 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4227 submatrix (same value is used for all local rows). 4228 - o_nnz - array containing the number of nonzeros in the various rows of the 4229 OFF-DIAGONAL portion of the local submatrix (possibly different for 4230 each row) or NULL, if o_nz is used to specify the nonzero 4231 structure. The size of this array is equal to the number 4232 of local rows, i.e 'm'. 4233 4234 Output Parameter: 4235 . A - the matrix 4236 4237 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4238 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4239 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4240 4241 Notes: 4242 If the *_nnz parameter is given then the *_nz parameter is ignored 4243 4244 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4245 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4246 storage requirements for this matrix. 4247 4248 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4249 processor than it must be used on all processors that share the object for 4250 that argument. 4251 4252 The user MUST specify either the local or global matrix dimensions 4253 (possibly both). 4254 4255 The parallel matrix is partitioned across processors such that the 4256 first m0 rows belong to process 0, the next m1 rows belong to 4257 process 1, the next m2 rows belong to process 2 etc.. where 4258 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4259 values corresponding to [m x N] submatrix. 4260 4261 The columns are logically partitioned with the n0 columns belonging 4262 to 0th partition, the next n1 columns belonging to the next 4263 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4264 4265 The DIAGONAL portion of the local submatrix on any given processor 4266 is the submatrix corresponding to the rows and columns m,n 4267 corresponding to the given processor. i.e diagonal matrix on 4268 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4269 etc. The remaining portion of the local submatrix [m x (N-n)] 4270 constitute the OFF-DIAGONAL portion. The example below better 4271 illustrates this concept. 4272 4273 For a square global matrix we define each processor's diagonal portion 4274 to be its local rows and the corresponding columns (a square submatrix); 4275 each processor's off-diagonal portion encompasses the remainder of the 4276 local matrix (a rectangular submatrix). 4277 4278 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4279 4280 When calling this routine with a single process communicator, a matrix of 4281 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4282 type of communicator, use the construction mechanism 4283 .vb 4284 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4285 .ve 4286 4287 $ MatCreate(...,&A); 4288 $ MatSetType(A,MATMPIAIJ); 4289 $ MatSetSizes(A, m,n,M,N); 4290 $ MatMPIAIJSetPreallocation(A,...); 4291 4292 By default, this format uses inodes (identical nodes) when possible. 4293 We search for consecutive rows with the same nonzero structure, thereby 4294 reusing matrix information to achieve increased efficiency. 4295 4296 Options Database Keys: 4297 + -mat_no_inode - Do not use inodes 4298 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4299 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4300 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4301 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4302 4303 Example usage: 4304 4305 Consider the following 8x8 matrix with 34 non-zero values, that is 4306 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4307 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4308 as follows 4309 4310 .vb 4311 1 2 0 | 0 3 0 | 0 4 4312 Proc0 0 5 6 | 7 0 0 | 8 0 4313 9 0 10 | 11 0 0 | 12 0 4314 ------------------------------------- 4315 13 0 14 | 15 16 17 | 0 0 4316 Proc1 0 18 0 | 19 20 21 | 0 0 4317 0 0 0 | 22 23 0 | 24 0 4318 ------------------------------------- 4319 Proc2 25 26 27 | 0 0 28 | 29 0 4320 30 0 0 | 31 32 33 | 0 34 4321 .ve 4322 4323 This can be represented as a collection of submatrices as 4324 4325 .vb 4326 A B C 4327 D E F 4328 G H I 4329 .ve 4330 4331 Where the submatrices A,B,C are owned by proc0, D,E,F are 4332 owned by proc1, G,H,I are owned by proc2. 4333 4334 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4335 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4336 The 'M','N' parameters are 8,8, and have the same values on all procs. 4337 4338 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4339 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4340 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4341 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4342 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4343 matrix, ans [DF] as another SeqAIJ matrix. 4344 4345 When d_nz, o_nz parameters are specified, d_nz storage elements are 4346 allocated for every row of the local diagonal submatrix, and o_nz 4347 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4348 One way to choose d_nz and o_nz is to use the max nonzerors per local 4349 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4350 In this case, the values of d_nz,o_nz are 4351 .vb 4352 proc0 : dnz = 2, o_nz = 2 4353 proc1 : dnz = 3, o_nz = 2 4354 proc2 : dnz = 1, o_nz = 4 4355 .ve 4356 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4357 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4358 for proc3. i.e we are using 12+15+10=37 storage locations to store 4359 34 values. 4360 4361 When d_nnz, o_nnz parameters are specified, the storage is specified 4362 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4363 In the above case the values for d_nnz,o_nnz are 4364 .vb 4365 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4366 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4367 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4368 .ve 4369 Here the space allocated is sum of all the above values i.e 34, and 4370 hence pre-allocation is perfect. 4371 4372 Level: intermediate 4373 4374 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4375 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4376 @*/ 4377 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4378 { 4379 PetscMPIInt size; 4380 4381 PetscFunctionBegin; 4382 PetscCall(MatCreate(comm,A)); 4383 PetscCall(MatSetSizes(*A,m,n,M,N)); 4384 PetscCallMPI(MPI_Comm_size(comm,&size)); 4385 if (size > 1) { 4386 PetscCall(MatSetType(*A,MATMPIAIJ)); 4387 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4388 } else { 4389 PetscCall(MatSetType(*A,MATSEQAIJ)); 4390 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4391 } 4392 PetscFunctionReturn(0); 4393 } 4394 4395 /*@C 4396 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4397 4398 Not collective 4399 4400 Input Parameter: 4401 . A - The MPIAIJ matrix 4402 4403 Output Parameters: 4404 + Ad - The local diagonal block as a SeqAIJ matrix 4405 . Ao - The local off-diagonal block as a SeqAIJ matrix 4406 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4407 4408 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4409 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4410 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4411 local column numbers to global column numbers in the original matrix. 4412 4413 Level: intermediate 4414 4415 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4416 @*/ 4417 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4418 { 4419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4420 PetscBool flg; 4421 4422 PetscFunctionBegin; 4423 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4424 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4425 if (Ad) *Ad = a->A; 4426 if (Ao) *Ao = a->B; 4427 if (colmap) *colmap = a->garray; 4428 PetscFunctionReturn(0); 4429 } 4430 4431 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4432 { 4433 PetscInt m,N,i,rstart,nnz,Ii; 4434 PetscInt *indx; 4435 PetscScalar *values; 4436 MatType rootType; 4437 4438 PetscFunctionBegin; 4439 PetscCall(MatGetSize(inmat,&m,&N)); 4440 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4441 PetscInt *dnz,*onz,sum,bs,cbs; 4442 4443 if (n == PETSC_DECIDE) { 4444 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4445 } 4446 /* Check sum(n) = N */ 4447 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4448 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4449 4450 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4451 rstart -= m; 4452 4453 MatPreallocateBegin(comm,m,n,dnz,onz); 4454 for (i=0; i<m; i++) { 4455 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4456 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4457 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4458 } 4459 4460 PetscCall(MatCreate(comm,outmat)); 4461 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4462 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4463 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4464 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4465 PetscCall(MatSetType(*outmat,rootType)); 4466 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4467 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4468 MatPreallocateEnd(dnz,onz); 4469 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4470 } 4471 4472 /* numeric phase */ 4473 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4474 for (i=0; i<m; i++) { 4475 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4476 Ii = i + rstart; 4477 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4478 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4479 } 4480 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4481 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4486 { 4487 PetscMPIInt rank; 4488 PetscInt m,N,i,rstart,nnz; 4489 size_t len; 4490 const PetscInt *indx; 4491 PetscViewer out; 4492 char *name; 4493 Mat B; 4494 const PetscScalar *values; 4495 4496 PetscFunctionBegin; 4497 PetscCall(MatGetLocalSize(A,&m,NULL)); 4498 PetscCall(MatGetSize(A,NULL,&N)); 4499 /* Should this be the type of the diagonal block of A? */ 4500 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4501 PetscCall(MatSetSizes(B,m,N,m,N)); 4502 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4503 PetscCall(MatSetType(B,MATSEQAIJ)); 4504 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4505 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4506 for (i=0; i<m; i++) { 4507 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4508 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4509 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4510 } 4511 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4512 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4513 4514 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4515 PetscCall(PetscStrlen(outfile,&len)); 4516 PetscCall(PetscMalloc1(len+6,&name)); 4517 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4518 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4519 PetscCall(PetscFree(name)); 4520 PetscCall(MatView(B,out)); 4521 PetscCall(PetscViewerDestroy(&out)); 4522 PetscCall(MatDestroy(&B)); 4523 PetscFunctionReturn(0); 4524 } 4525 4526 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4527 { 4528 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4529 4530 PetscFunctionBegin; 4531 if (!merge) PetscFunctionReturn(0); 4532 PetscCall(PetscFree(merge->id_r)); 4533 PetscCall(PetscFree(merge->len_s)); 4534 PetscCall(PetscFree(merge->len_r)); 4535 PetscCall(PetscFree(merge->bi)); 4536 PetscCall(PetscFree(merge->bj)); 4537 PetscCall(PetscFree(merge->buf_ri[0])); 4538 PetscCall(PetscFree(merge->buf_ri)); 4539 PetscCall(PetscFree(merge->buf_rj[0])); 4540 PetscCall(PetscFree(merge->buf_rj)); 4541 PetscCall(PetscFree(merge->coi)); 4542 PetscCall(PetscFree(merge->coj)); 4543 PetscCall(PetscFree(merge->owners_co)); 4544 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4545 PetscCall(PetscFree(merge)); 4546 PetscFunctionReturn(0); 4547 } 4548 4549 #include <../src/mat/utils/freespace.h> 4550 #include <petscbt.h> 4551 4552 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4553 { 4554 MPI_Comm comm; 4555 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4556 PetscMPIInt size,rank,taga,*len_s; 4557 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4558 PetscInt proc,m; 4559 PetscInt **buf_ri,**buf_rj; 4560 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4561 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4562 MPI_Request *s_waits,*r_waits; 4563 MPI_Status *status; 4564 const MatScalar *aa,*a_a; 4565 MatScalar **abuf_r,*ba_i; 4566 Mat_Merge_SeqsToMPI *merge; 4567 PetscContainer container; 4568 4569 PetscFunctionBegin; 4570 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4571 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4572 4573 PetscCallMPI(MPI_Comm_size(comm,&size)); 4574 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4575 4576 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4577 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4578 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4579 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4580 aa = a_a; 4581 4582 bi = merge->bi; 4583 bj = merge->bj; 4584 buf_ri = merge->buf_ri; 4585 buf_rj = merge->buf_rj; 4586 4587 PetscCall(PetscMalloc1(size,&status)); 4588 owners = merge->rowmap->range; 4589 len_s = merge->len_s; 4590 4591 /* send and recv matrix values */ 4592 /*-----------------------------*/ 4593 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4594 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4595 4596 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4597 for (proc=0,k=0; proc<size; proc++) { 4598 if (!len_s[proc]) continue; 4599 i = owners[proc]; 4600 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4601 k++; 4602 } 4603 4604 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4605 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4606 PetscCall(PetscFree(status)); 4607 4608 PetscCall(PetscFree(s_waits)); 4609 PetscCall(PetscFree(r_waits)); 4610 4611 /* insert mat values of mpimat */ 4612 /*----------------------------*/ 4613 PetscCall(PetscMalloc1(N,&ba_i)); 4614 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4615 4616 for (k=0; k<merge->nrecv; k++) { 4617 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4618 nrows = *(buf_ri_k[k]); 4619 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4620 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4621 } 4622 4623 /* set values of ba */ 4624 m = merge->rowmap->n; 4625 for (i=0; i<m; i++) { 4626 arow = owners[rank] + i; 4627 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4628 bnzi = bi[i+1] - bi[i]; 4629 PetscCall(PetscArrayzero(ba_i,bnzi)); 4630 4631 /* add local non-zero vals of this proc's seqmat into ba */ 4632 anzi = ai[arow+1] - ai[arow]; 4633 aj = a->j + ai[arow]; 4634 aa = a_a + ai[arow]; 4635 nextaj = 0; 4636 for (j=0; nextaj<anzi; j++) { 4637 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4638 ba_i[j] += aa[nextaj++]; 4639 } 4640 } 4641 4642 /* add received vals into ba */ 4643 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4644 /* i-th row */ 4645 if (i == *nextrow[k]) { 4646 anzi = *(nextai[k]+1) - *nextai[k]; 4647 aj = buf_rj[k] + *(nextai[k]); 4648 aa = abuf_r[k] + *(nextai[k]); 4649 nextaj = 0; 4650 for (j=0; nextaj<anzi; j++) { 4651 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4652 ba_i[j] += aa[nextaj++]; 4653 } 4654 } 4655 nextrow[k]++; nextai[k]++; 4656 } 4657 } 4658 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4659 } 4660 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4661 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4662 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4663 4664 PetscCall(PetscFree(abuf_r[0])); 4665 PetscCall(PetscFree(abuf_r)); 4666 PetscCall(PetscFree(ba_i)); 4667 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4668 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4673 { 4674 Mat B_mpi; 4675 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4676 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4677 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4678 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4679 PetscInt len,proc,*dnz,*onz,bs,cbs; 4680 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4681 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4682 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4683 MPI_Status *status; 4684 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4685 PetscBT lnkbt; 4686 Mat_Merge_SeqsToMPI *merge; 4687 PetscContainer container; 4688 4689 PetscFunctionBegin; 4690 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4691 4692 /* make sure it is a PETSc comm */ 4693 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4694 PetscCallMPI(MPI_Comm_size(comm,&size)); 4695 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4696 4697 PetscCall(PetscNew(&merge)); 4698 PetscCall(PetscMalloc1(size,&status)); 4699 4700 /* determine row ownership */ 4701 /*---------------------------------------------------------*/ 4702 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4703 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4704 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4705 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4706 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4707 PetscCall(PetscMalloc1(size,&len_si)); 4708 PetscCall(PetscMalloc1(size,&merge->len_s)); 4709 4710 m = merge->rowmap->n; 4711 owners = merge->rowmap->range; 4712 4713 /* determine the number of messages to send, their lengths */ 4714 /*---------------------------------------------------------*/ 4715 len_s = merge->len_s; 4716 4717 len = 0; /* length of buf_si[] */ 4718 merge->nsend = 0; 4719 for (proc=0; proc<size; proc++) { 4720 len_si[proc] = 0; 4721 if (proc == rank) { 4722 len_s[proc] = 0; 4723 } else { 4724 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4725 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4726 } 4727 if (len_s[proc]) { 4728 merge->nsend++; 4729 nrows = 0; 4730 for (i=owners[proc]; i<owners[proc+1]; i++) { 4731 if (ai[i+1] > ai[i]) nrows++; 4732 } 4733 len_si[proc] = 2*(nrows+1); 4734 len += len_si[proc]; 4735 } 4736 } 4737 4738 /* determine the number and length of messages to receive for ij-structure */ 4739 /*-------------------------------------------------------------------------*/ 4740 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4741 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4742 4743 /* post the Irecv of j-structure */ 4744 /*-------------------------------*/ 4745 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4746 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4747 4748 /* post the Isend of j-structure */ 4749 /*--------------------------------*/ 4750 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4751 4752 for (proc=0, k=0; proc<size; proc++) { 4753 if (!len_s[proc]) continue; 4754 i = owners[proc]; 4755 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4756 k++; 4757 } 4758 4759 /* receives and sends of j-structure are complete */ 4760 /*------------------------------------------------*/ 4761 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4762 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4763 4764 /* send and recv i-structure */ 4765 /*---------------------------*/ 4766 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4767 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4768 4769 PetscCall(PetscMalloc1(len+1,&buf_s)); 4770 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4771 for (proc=0,k=0; proc<size; proc++) { 4772 if (!len_s[proc]) continue; 4773 /* form outgoing message for i-structure: 4774 buf_si[0]: nrows to be sent 4775 [1:nrows]: row index (global) 4776 [nrows+1:2*nrows+1]: i-structure index 4777 */ 4778 /*-------------------------------------------*/ 4779 nrows = len_si[proc]/2 - 1; 4780 buf_si_i = buf_si + nrows+1; 4781 buf_si[0] = nrows; 4782 buf_si_i[0] = 0; 4783 nrows = 0; 4784 for (i=owners[proc]; i<owners[proc+1]; i++) { 4785 anzi = ai[i+1] - ai[i]; 4786 if (anzi) { 4787 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4788 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4789 nrows++; 4790 } 4791 } 4792 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4793 k++; 4794 buf_si += len_si[proc]; 4795 } 4796 4797 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4798 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4799 4800 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4801 for (i=0; i<merge->nrecv; i++) { 4802 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4803 } 4804 4805 PetscCall(PetscFree(len_si)); 4806 PetscCall(PetscFree(len_ri)); 4807 PetscCall(PetscFree(rj_waits)); 4808 PetscCall(PetscFree2(si_waits,sj_waits)); 4809 PetscCall(PetscFree(ri_waits)); 4810 PetscCall(PetscFree(buf_s)); 4811 PetscCall(PetscFree(status)); 4812 4813 /* compute a local seq matrix in each processor */ 4814 /*----------------------------------------------*/ 4815 /* allocate bi array and free space for accumulating nonzero column info */ 4816 PetscCall(PetscMalloc1(m+1,&bi)); 4817 bi[0] = 0; 4818 4819 /* create and initialize a linked list */ 4820 nlnk = N+1; 4821 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4822 4823 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4824 len = ai[owners[rank+1]] - ai[owners[rank]]; 4825 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4826 4827 current_space = free_space; 4828 4829 /* determine symbolic info for each local row */ 4830 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4831 4832 for (k=0; k<merge->nrecv; k++) { 4833 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4834 nrows = *buf_ri_k[k]; 4835 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4836 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4837 } 4838 4839 MatPreallocateBegin(comm,m,n,dnz,onz); 4840 len = 0; 4841 for (i=0; i<m; i++) { 4842 bnzi = 0; 4843 /* add local non-zero cols of this proc's seqmat into lnk */ 4844 arow = owners[rank] + i; 4845 anzi = ai[arow+1] - ai[arow]; 4846 aj = a->j + ai[arow]; 4847 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4848 bnzi += nlnk; 4849 /* add received col data into lnk */ 4850 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4851 if (i == *nextrow[k]) { /* i-th row */ 4852 anzi = *(nextai[k]+1) - *nextai[k]; 4853 aj = buf_rj[k] + *nextai[k]; 4854 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4855 bnzi += nlnk; 4856 nextrow[k]++; nextai[k]++; 4857 } 4858 } 4859 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4860 4861 /* if free space is not available, make more free space */ 4862 if (current_space->local_remaining<bnzi) { 4863 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4864 nspacedouble++; 4865 } 4866 /* copy data into free space, then initialize lnk */ 4867 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4868 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4869 4870 current_space->array += bnzi; 4871 current_space->local_used += bnzi; 4872 current_space->local_remaining -= bnzi; 4873 4874 bi[i+1] = bi[i] + bnzi; 4875 } 4876 4877 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4878 4879 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4880 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4881 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4882 4883 /* create symbolic parallel matrix B_mpi */ 4884 /*---------------------------------------*/ 4885 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4886 PetscCall(MatCreate(comm,&B_mpi)); 4887 if (n==PETSC_DECIDE) { 4888 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4889 } else { 4890 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4891 } 4892 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4893 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4894 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4895 MatPreallocateEnd(dnz,onz); 4896 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4897 4898 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4899 B_mpi->assembled = PETSC_FALSE; 4900 merge->bi = bi; 4901 merge->bj = bj; 4902 merge->buf_ri = buf_ri; 4903 merge->buf_rj = buf_rj; 4904 merge->coi = NULL; 4905 merge->coj = NULL; 4906 merge->owners_co = NULL; 4907 4908 PetscCall(PetscCommDestroy(&comm)); 4909 4910 /* attach the supporting struct to B_mpi for reuse */ 4911 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4912 PetscCall(PetscContainerSetPointer(container,merge)); 4913 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4914 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4915 PetscCall(PetscContainerDestroy(&container)); 4916 *mpimat = B_mpi; 4917 4918 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4919 PetscFunctionReturn(0); 4920 } 4921 4922 /*@C 4923 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4924 matrices from each processor 4925 4926 Collective 4927 4928 Input Parameters: 4929 + comm - the communicators the parallel matrix will live on 4930 . seqmat - the input sequential matrices 4931 . m - number of local rows (or PETSC_DECIDE) 4932 . n - number of local columns (or PETSC_DECIDE) 4933 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4934 4935 Output Parameter: 4936 . mpimat - the parallel matrix generated 4937 4938 Level: advanced 4939 4940 Notes: 4941 The dimensions of the sequential matrix in each processor MUST be the same. 4942 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4943 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4944 @*/ 4945 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4946 { 4947 PetscMPIInt size; 4948 4949 PetscFunctionBegin; 4950 PetscCallMPI(MPI_Comm_size(comm,&size)); 4951 if (size == 1) { 4952 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4953 if (scall == MAT_INITIAL_MATRIX) { 4954 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4955 } else { 4956 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4957 } 4958 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4959 PetscFunctionReturn(0); 4960 } 4961 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4962 if (scall == MAT_INITIAL_MATRIX) { 4963 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4964 } 4965 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4966 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4967 PetscFunctionReturn(0); 4968 } 4969 4970 /*@ 4971 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4972 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4973 with MatGetSize() 4974 4975 Not Collective 4976 4977 Input Parameters: 4978 + A - the matrix 4979 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4980 4981 Output Parameter: 4982 . A_loc - the local sequential matrix generated 4983 4984 Level: developer 4985 4986 Notes: 4987 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4988 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4989 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4990 modify the values of the returned A_loc. 4991 4992 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 4993 @*/ 4994 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4995 { 4996 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4997 Mat_SeqAIJ *mat,*a,*b; 4998 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4999 const PetscScalar *aa,*ba,*aav,*bav; 5000 PetscScalar *ca,*cam; 5001 PetscMPIInt size; 5002 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5003 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5004 PetscBool match; 5005 5006 PetscFunctionBegin; 5007 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5008 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5009 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5010 if (size == 1) { 5011 if (scall == MAT_INITIAL_MATRIX) { 5012 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5013 *A_loc = mpimat->A; 5014 } else if (scall == MAT_REUSE_MATRIX) { 5015 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5016 } 5017 PetscFunctionReturn(0); 5018 } 5019 5020 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5021 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5022 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5023 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5024 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5025 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5026 aa = aav; 5027 ba = bav; 5028 if (scall == MAT_INITIAL_MATRIX) { 5029 PetscCall(PetscMalloc1(1+am,&ci)); 5030 ci[0] = 0; 5031 for (i=0; i<am; i++) { 5032 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5033 } 5034 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5035 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5036 k = 0; 5037 for (i=0; i<am; i++) { 5038 ncols_o = bi[i+1] - bi[i]; 5039 ncols_d = ai[i+1] - ai[i]; 5040 /* off-diagonal portion of A */ 5041 for (jo=0; jo<ncols_o; jo++) { 5042 col = cmap[*bj]; 5043 if (col >= cstart) break; 5044 cj[k] = col; bj++; 5045 ca[k++] = *ba++; 5046 } 5047 /* diagonal portion of A */ 5048 for (j=0; j<ncols_d; j++) { 5049 cj[k] = cstart + *aj++; 5050 ca[k++] = *aa++; 5051 } 5052 /* off-diagonal portion of A */ 5053 for (j=jo; j<ncols_o; j++) { 5054 cj[k] = cmap[*bj++]; 5055 ca[k++] = *ba++; 5056 } 5057 } 5058 /* put together the new matrix */ 5059 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5060 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5061 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5062 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5063 mat->free_a = PETSC_TRUE; 5064 mat->free_ij = PETSC_TRUE; 5065 mat->nonew = 0; 5066 } else if (scall == MAT_REUSE_MATRIX) { 5067 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5068 ci = mat->i; 5069 cj = mat->j; 5070 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5071 for (i=0; i<am; i++) { 5072 /* off-diagonal portion of A */ 5073 ncols_o = bi[i+1] - bi[i]; 5074 for (jo=0; jo<ncols_o; jo++) { 5075 col = cmap[*bj]; 5076 if (col >= cstart) break; 5077 *cam++ = *ba++; bj++; 5078 } 5079 /* diagonal portion of A */ 5080 ncols_d = ai[i+1] - ai[i]; 5081 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5082 /* off-diagonal portion of A */ 5083 for (j=jo; j<ncols_o; j++) { 5084 *cam++ = *ba++; bj++; 5085 } 5086 } 5087 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5088 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5089 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5090 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5091 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5092 PetscFunctionReturn(0); 5093 } 5094 5095 /*@ 5096 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5097 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5104 5105 Output Parameters: 5106 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5107 - A_loc - the local sequential matrix generated 5108 5109 Level: developer 5110 5111 Notes: 5112 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5113 5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5115 5116 @*/ 5117 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5118 { 5119 Mat Ao,Ad; 5120 const PetscInt *cmap; 5121 PetscMPIInt size; 5122 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5123 5124 PetscFunctionBegin; 5125 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5126 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5127 if (size == 1) { 5128 if (scall == MAT_INITIAL_MATRIX) { 5129 PetscCall(PetscObjectReference((PetscObject)Ad)); 5130 *A_loc = Ad; 5131 } else if (scall == MAT_REUSE_MATRIX) { 5132 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5133 } 5134 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5135 PetscFunctionReturn(0); 5136 } 5137 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5138 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5139 if (f) { 5140 PetscCall((*f)(A,scall,glob,A_loc)); 5141 } else { 5142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5143 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5144 Mat_SeqAIJ *c; 5145 PetscInt *ai = a->i, *aj = a->j; 5146 PetscInt *bi = b->i, *bj = b->j; 5147 PetscInt *ci,*cj; 5148 const PetscScalar *aa,*ba; 5149 PetscScalar *ca; 5150 PetscInt i,j,am,dn,on; 5151 5152 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5153 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5154 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5155 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5156 if (scall == MAT_INITIAL_MATRIX) { 5157 PetscInt k; 5158 PetscCall(PetscMalloc1(1+am,&ci)); 5159 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5160 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5161 ci[0] = 0; 5162 for (i=0,k=0; i<am; i++) { 5163 const PetscInt ncols_o = bi[i+1] - bi[i]; 5164 const PetscInt ncols_d = ai[i+1] - ai[i]; 5165 ci[i+1] = ci[i] + ncols_o + ncols_d; 5166 /* diagonal portion of A */ 5167 for (j=0; j<ncols_d; j++,k++) { 5168 cj[k] = *aj++; 5169 ca[k] = *aa++; 5170 } 5171 /* off-diagonal portion of A */ 5172 for (j=0; j<ncols_o; j++,k++) { 5173 cj[k] = dn + *bj++; 5174 ca[k] = *ba++; 5175 } 5176 } 5177 /* put together the new matrix */ 5178 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5179 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5180 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5181 c = (Mat_SeqAIJ*)(*A_loc)->data; 5182 c->free_a = PETSC_TRUE; 5183 c->free_ij = PETSC_TRUE; 5184 c->nonew = 0; 5185 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5188 for (i=0; i<am; i++) { 5189 const PetscInt ncols_d = ai[i+1] - ai[i]; 5190 const PetscInt ncols_o = bi[i+1] - bi[i]; 5191 /* diagonal portion of A */ 5192 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5193 /* off-diagonal portion of A */ 5194 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5195 } 5196 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5197 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5198 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5199 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5200 if (glob) { 5201 PetscInt cst, *gidx; 5202 5203 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5204 PetscCall(PetscMalloc1(dn+on,&gidx)); 5205 for (i=0; i<dn; i++) gidx[i] = cst + i; 5206 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5207 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5208 } 5209 } 5210 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5211 PetscFunctionReturn(0); 5212 } 5213 5214 /*@C 5215 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5216 5217 Not Collective 5218 5219 Input Parameters: 5220 + A - the matrix 5221 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5222 - row, col - index sets of rows and columns to extract (or NULL) 5223 5224 Output Parameter: 5225 . A_loc - the local sequential matrix generated 5226 5227 Level: developer 5228 5229 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5230 5231 @*/ 5232 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5233 { 5234 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5235 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5236 IS isrowa,iscola; 5237 Mat *aloc; 5238 PetscBool match; 5239 5240 PetscFunctionBegin; 5241 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5242 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5243 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5244 if (!row) { 5245 start = A->rmap->rstart; end = A->rmap->rend; 5246 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5247 } else { 5248 isrowa = *row; 5249 } 5250 if (!col) { 5251 start = A->cmap->rstart; 5252 cmap = a->garray; 5253 nzA = a->A->cmap->n; 5254 nzB = a->B->cmap->n; 5255 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5256 ncols = 0; 5257 for (i=0; i<nzB; i++) { 5258 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5259 else break; 5260 } 5261 imark = i; 5262 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5263 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5265 } else { 5266 iscola = *col; 5267 } 5268 if (scall != MAT_INITIAL_MATRIX) { 5269 PetscCall(PetscMalloc1(1,&aloc)); 5270 aloc[0] = *A_loc; 5271 } 5272 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5273 if (!col) { /* attach global id of condensed columns */ 5274 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5275 } 5276 *A_loc = aloc[0]; 5277 PetscCall(PetscFree(aloc)); 5278 if (!row) { 5279 PetscCall(ISDestroy(&isrowa)); 5280 } 5281 if (!col) { 5282 PetscCall(ISDestroy(&iscola)); 5283 } 5284 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5285 PetscFunctionReturn(0); 5286 } 5287 5288 /* 5289 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5290 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5291 * on a global size. 5292 * */ 5293 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5294 { 5295 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5296 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5297 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5298 PetscMPIInt owner; 5299 PetscSFNode *iremote,*oiremote; 5300 const PetscInt *lrowindices; 5301 PetscSF sf,osf; 5302 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5303 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5304 MPI_Comm comm; 5305 ISLocalToGlobalMapping mapping; 5306 const PetscScalar *pd_a,*po_a; 5307 5308 PetscFunctionBegin; 5309 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5310 /* plocalsize is the number of roots 5311 * nrows is the number of leaves 5312 * */ 5313 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5314 PetscCall(ISGetLocalSize(rows,&nrows)); 5315 PetscCall(PetscCalloc1(nrows,&iremote)); 5316 PetscCall(ISGetIndices(rows,&lrowindices)); 5317 for (i=0;i<nrows;i++) { 5318 /* Find a remote index and an owner for a row 5319 * The row could be local or remote 5320 * */ 5321 owner = 0; 5322 lidx = 0; 5323 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5324 iremote[i].index = lidx; 5325 iremote[i].rank = owner; 5326 } 5327 /* Create SF to communicate how many nonzero columns for each row */ 5328 PetscCall(PetscSFCreate(comm,&sf)); 5329 /* SF will figure out the number of nonzero colunms for each row, and their 5330 * offsets 5331 * */ 5332 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5333 PetscCall(PetscSFSetFromOptions(sf)); 5334 PetscCall(PetscSFSetUp(sf)); 5335 5336 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5337 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5338 PetscCall(PetscCalloc1(nrows,&pnnz)); 5339 roffsets[0] = 0; 5340 roffsets[1] = 0; 5341 for (i=0;i<plocalsize;i++) { 5342 /* diag */ 5343 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5344 /* off diag */ 5345 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5346 /* compute offsets so that we relative location for each row */ 5347 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5348 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5349 } 5350 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5351 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5352 /* 'r' means root, and 'l' means leaf */ 5353 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5354 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5355 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5356 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5357 PetscCall(PetscSFDestroy(&sf)); 5358 PetscCall(PetscFree(roffsets)); 5359 PetscCall(PetscFree(nrcols)); 5360 dntotalcols = 0; 5361 ontotalcols = 0; 5362 ncol = 0; 5363 for (i=0;i<nrows;i++) { 5364 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5365 ncol = PetscMax(pnnz[i],ncol); 5366 /* diag */ 5367 dntotalcols += nlcols[i*2+0]; 5368 /* off diag */ 5369 ontotalcols += nlcols[i*2+1]; 5370 } 5371 /* We do not need to figure the right number of columns 5372 * since all the calculations will be done by going through the raw data 5373 * */ 5374 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5375 PetscCall(MatSetUp(*P_oth)); 5376 PetscCall(PetscFree(pnnz)); 5377 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5378 /* diag */ 5379 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5380 /* off diag */ 5381 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5382 /* diag */ 5383 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5384 /* off diag */ 5385 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5386 dntotalcols = 0; 5387 ontotalcols = 0; 5388 ntotalcols = 0; 5389 for (i=0;i<nrows;i++) { 5390 owner = 0; 5391 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5392 /* Set iremote for diag matrix */ 5393 for (j=0;j<nlcols[i*2+0];j++) { 5394 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5395 iremote[dntotalcols].rank = owner; 5396 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5397 ilocal[dntotalcols++] = ntotalcols++; 5398 } 5399 /* off diag */ 5400 for (j=0;j<nlcols[i*2+1];j++) { 5401 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5402 oiremote[ontotalcols].rank = owner; 5403 oilocal[ontotalcols++] = ntotalcols++; 5404 } 5405 } 5406 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5407 PetscCall(PetscFree(loffsets)); 5408 PetscCall(PetscFree(nlcols)); 5409 PetscCall(PetscSFCreate(comm,&sf)); 5410 /* P serves as roots and P_oth is leaves 5411 * Diag matrix 5412 * */ 5413 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5414 PetscCall(PetscSFSetFromOptions(sf)); 5415 PetscCall(PetscSFSetUp(sf)); 5416 5417 PetscCall(PetscSFCreate(comm,&osf)); 5418 /* Off diag */ 5419 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5420 PetscCall(PetscSFSetFromOptions(osf)); 5421 PetscCall(PetscSFSetUp(osf)); 5422 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5423 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5424 /* We operate on the matrix internal data for saving memory */ 5425 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5426 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5427 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5428 /* Convert to global indices for diag matrix */ 5429 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5430 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5431 /* We want P_oth store global indices */ 5432 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5433 /* Use memory scalable approach */ 5434 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5435 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5436 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5437 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5438 /* Convert back to local indices */ 5439 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5440 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5441 nout = 0; 5442 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5443 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5444 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5445 /* Exchange values */ 5446 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5447 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5448 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5449 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5450 /* Stop PETSc from shrinking memory */ 5451 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5452 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5453 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5454 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5455 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5456 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5457 PetscCall(PetscSFDestroy(&sf)); 5458 PetscCall(PetscSFDestroy(&osf)); 5459 PetscFunctionReturn(0); 5460 } 5461 5462 /* 5463 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5464 * This supports MPIAIJ and MAIJ 5465 * */ 5466 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5467 { 5468 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5469 Mat_SeqAIJ *p_oth; 5470 IS rows,map; 5471 PetscHMapI hamp; 5472 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5473 MPI_Comm comm; 5474 PetscSF sf,osf; 5475 PetscBool has; 5476 5477 PetscFunctionBegin; 5478 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5479 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5480 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5481 * and then create a submatrix (that often is an overlapping matrix) 5482 * */ 5483 if (reuse == MAT_INITIAL_MATRIX) { 5484 /* Use a hash table to figure out unique keys */ 5485 PetscCall(PetscHMapICreate(&hamp)); 5486 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5487 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5488 count = 0; 5489 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5490 for (i=0;i<a->B->cmap->n;i++) { 5491 key = a->garray[i]/dof; 5492 PetscCall(PetscHMapIHas(hamp,key,&has)); 5493 if (!has) { 5494 mapping[i] = count; 5495 PetscCall(PetscHMapISet(hamp,key,count++)); 5496 } else { 5497 /* Current 'i' has the same value the previous step */ 5498 mapping[i] = count-1; 5499 } 5500 } 5501 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5502 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5503 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5504 PetscCall(PetscCalloc1(htsize,&rowindices)); 5505 off = 0; 5506 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5507 PetscCall(PetscHMapIDestroy(&hamp)); 5508 PetscCall(PetscSortInt(htsize,rowindices)); 5509 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5510 /* In case, the matrix was already created but users want to recreate the matrix */ 5511 PetscCall(MatDestroy(P_oth)); 5512 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5513 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5514 PetscCall(ISDestroy(&map)); 5515 PetscCall(ISDestroy(&rows)); 5516 } else if (reuse == MAT_REUSE_MATRIX) { 5517 /* If matrix was already created, we simply update values using SF objects 5518 * that as attached to the matrix ealier. 5519 */ 5520 const PetscScalar *pd_a,*po_a; 5521 5522 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5523 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5524 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5525 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5526 /* Update values in place */ 5527 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5528 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5529 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5530 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5531 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5532 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5533 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5534 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5535 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5536 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5537 PetscFunctionReturn(0); 5538 } 5539 5540 /*@C 5541 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5542 5543 Collective on Mat 5544 5545 Input Parameters: 5546 + A - the first matrix in mpiaij format 5547 . B - the second matrix in mpiaij format 5548 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5549 5550 Output Parameters: 5551 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5552 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5553 - B_seq - the sequential matrix generated 5554 5555 Level: developer 5556 5557 @*/ 5558 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5559 { 5560 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5561 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5562 IS isrowb,iscolb; 5563 Mat *bseq=NULL; 5564 5565 PetscFunctionBegin; 5566 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5567 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5568 } 5569 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5570 5571 if (scall == MAT_INITIAL_MATRIX) { 5572 start = A->cmap->rstart; 5573 cmap = a->garray; 5574 nzA = a->A->cmap->n; 5575 nzB = a->B->cmap->n; 5576 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5577 ncols = 0; 5578 for (i=0; i<nzB; i++) { /* row < local row index */ 5579 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5580 else break; 5581 } 5582 imark = i; 5583 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5584 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5585 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5586 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5587 } else { 5588 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5589 isrowb = *rowb; iscolb = *colb; 5590 PetscCall(PetscMalloc1(1,&bseq)); 5591 bseq[0] = *B_seq; 5592 } 5593 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5594 *B_seq = bseq[0]; 5595 PetscCall(PetscFree(bseq)); 5596 if (!rowb) { 5597 PetscCall(ISDestroy(&isrowb)); 5598 } else { 5599 *rowb = isrowb; 5600 } 5601 if (!colb) { 5602 PetscCall(ISDestroy(&iscolb)); 5603 } else { 5604 *colb = iscolb; 5605 } 5606 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5607 PetscFunctionReturn(0); 5608 } 5609 5610 /* 5611 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5612 of the OFF-DIAGONAL portion of local A 5613 5614 Collective on Mat 5615 5616 Input Parameters: 5617 + A,B - the matrices in mpiaij format 5618 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5619 5620 Output Parameter: 5621 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5622 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5623 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5624 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5625 5626 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5627 for this matrix. This is not desirable.. 5628 5629 Level: developer 5630 5631 */ 5632 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5633 { 5634 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5635 Mat_SeqAIJ *b_oth; 5636 VecScatter ctx; 5637 MPI_Comm comm; 5638 const PetscMPIInt *rprocs,*sprocs; 5639 const PetscInt *srow,*rstarts,*sstarts; 5640 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5641 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5642 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5643 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5644 PetscMPIInt size,tag,rank,nreqs; 5645 5646 PetscFunctionBegin; 5647 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5648 PetscCallMPI(MPI_Comm_size(comm,&size)); 5649 5650 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5651 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5652 } 5653 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5654 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5655 5656 if (size == 1) { 5657 startsj_s = NULL; 5658 bufa_ptr = NULL; 5659 *B_oth = NULL; 5660 PetscFunctionReturn(0); 5661 } 5662 5663 ctx = a->Mvctx; 5664 tag = ((PetscObject)ctx)->tag; 5665 5666 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5667 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5668 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5669 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5670 PetscCall(PetscMalloc1(nreqs,&reqs)); 5671 rwaits = reqs; 5672 swaits = reqs + nrecvs; 5673 5674 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5675 if (scall == MAT_INITIAL_MATRIX) { 5676 /* i-array */ 5677 /*---------*/ 5678 /* post receives */ 5679 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5680 for (i=0; i<nrecvs; i++) { 5681 rowlen = rvalues + rstarts[i]*rbs; 5682 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5683 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5684 } 5685 5686 /* pack the outgoing message */ 5687 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5688 5689 sstartsj[0] = 0; 5690 rstartsj[0] = 0; 5691 len = 0; /* total length of j or a array to be sent */ 5692 if (nsends) { 5693 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5694 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5695 } 5696 for (i=0; i<nsends; i++) { 5697 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5698 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5699 for (j=0; j<nrows; j++) { 5700 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5701 for (l=0; l<sbs; l++) { 5702 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5703 5704 rowlen[j*sbs+l] = ncols; 5705 5706 len += ncols; 5707 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5708 } 5709 k++; 5710 } 5711 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5712 5713 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5714 } 5715 /* recvs and sends of i-array are completed */ 5716 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5717 PetscCall(PetscFree(svalues)); 5718 5719 /* allocate buffers for sending j and a arrays */ 5720 PetscCall(PetscMalloc1(len+1,&bufj)); 5721 PetscCall(PetscMalloc1(len+1,&bufa)); 5722 5723 /* create i-array of B_oth */ 5724 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5725 5726 b_othi[0] = 0; 5727 len = 0; /* total length of j or a array to be received */ 5728 k = 0; 5729 for (i=0; i<nrecvs; i++) { 5730 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5731 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5732 for (j=0; j<nrows; j++) { 5733 b_othi[k+1] = b_othi[k] + rowlen[j]; 5734 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5735 k++; 5736 } 5737 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5738 } 5739 PetscCall(PetscFree(rvalues)); 5740 5741 /* allocate space for j and a arrrays of B_oth */ 5742 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5743 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5744 5745 /* j-array */ 5746 /*---------*/ 5747 /* post receives of j-array */ 5748 for (i=0; i<nrecvs; i++) { 5749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5750 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5751 } 5752 5753 /* pack the outgoing message j-array */ 5754 if (nsends) k = sstarts[0]; 5755 for (i=0; i<nsends; i++) { 5756 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5757 bufJ = bufj+sstartsj[i]; 5758 for (j=0; j<nrows; j++) { 5759 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5760 for (ll=0; ll<sbs; ll++) { 5761 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5762 for (l=0; l<ncols; l++) { 5763 *bufJ++ = cols[l]; 5764 } 5765 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5766 } 5767 } 5768 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5769 } 5770 5771 /* recvs and sends of j-array are completed */ 5772 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5773 } else if (scall == MAT_REUSE_MATRIX) { 5774 sstartsj = *startsj_s; 5775 rstartsj = *startsj_r; 5776 bufa = *bufa_ptr; 5777 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5778 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5780 5781 /* a-array */ 5782 /*---------*/ 5783 /* post receives of a-array */ 5784 for (i=0; i<nrecvs; i++) { 5785 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5786 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5787 } 5788 5789 /* pack the outgoing message a-array */ 5790 if (nsends) k = sstarts[0]; 5791 for (i=0; i<nsends; i++) { 5792 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5793 bufA = bufa+sstartsj[i]; 5794 for (j=0; j<nrows; j++) { 5795 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5796 for (ll=0; ll<sbs; ll++) { 5797 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5798 for (l=0; l<ncols; l++) { 5799 *bufA++ = vals[l]; 5800 } 5801 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5802 } 5803 } 5804 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5805 } 5806 /* recvs and sends of a-array are completed */ 5807 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5808 PetscCall(PetscFree(reqs)); 5809 5810 if (scall == MAT_INITIAL_MATRIX) { 5811 /* put together the new matrix */ 5812 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5813 5814 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5815 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5816 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5817 b_oth->free_a = PETSC_TRUE; 5818 b_oth->free_ij = PETSC_TRUE; 5819 b_oth->nonew = 0; 5820 5821 PetscCall(PetscFree(bufj)); 5822 if (!startsj_s || !bufa_ptr) { 5823 PetscCall(PetscFree2(sstartsj,rstartsj)); 5824 PetscCall(PetscFree(bufa_ptr)); 5825 } else { 5826 *startsj_s = sstartsj; 5827 *startsj_r = rstartsj; 5828 *bufa_ptr = bufa; 5829 } 5830 } else if (scall == MAT_REUSE_MATRIX) { 5831 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5832 } 5833 5834 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5835 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5836 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5837 PetscFunctionReturn(0); 5838 } 5839 5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5841 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5842 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5843 #if defined(PETSC_HAVE_MKL_SPARSE) 5844 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5845 #endif 5846 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5847 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5848 #if defined(PETSC_HAVE_ELEMENTAL) 5849 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5850 #endif 5851 #if defined(PETSC_HAVE_SCALAPACK) 5852 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5853 #endif 5854 #if defined(PETSC_HAVE_HYPRE) 5855 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5856 #endif 5857 #if defined(PETSC_HAVE_CUDA) 5858 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5859 #endif 5860 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5861 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5862 #endif 5863 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5864 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5865 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5866 5867 /* 5868 Computes (B'*A')' since computing B*A directly is untenable 5869 5870 n p p 5871 [ ] [ ] [ ] 5872 m [ A ] * n [ B ] = m [ C ] 5873 [ ] [ ] [ ] 5874 5875 */ 5876 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5877 { 5878 Mat At,Bt,Ct; 5879 5880 PetscFunctionBegin; 5881 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5882 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5883 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5884 PetscCall(MatDestroy(&At)); 5885 PetscCall(MatDestroy(&Bt)); 5886 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5887 PetscCall(MatDestroy(&Ct)); 5888 PetscFunctionReturn(0); 5889 } 5890 5891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5892 { 5893 PetscBool cisdense; 5894 5895 PetscFunctionBegin; 5896 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5897 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5898 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5899 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5900 if (!cisdense) { 5901 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5902 } 5903 PetscCall(MatSetUp(C)); 5904 5905 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5906 PetscFunctionReturn(0); 5907 } 5908 5909 /* ----------------------------------------------------------------*/ 5910 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5911 { 5912 Mat_Product *product = C->product; 5913 Mat A = product->A,B=product->B; 5914 5915 PetscFunctionBegin; 5916 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5917 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5918 5919 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5920 C->ops->productsymbolic = MatProductSymbolic_AB; 5921 PetscFunctionReturn(0); 5922 } 5923 5924 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5925 { 5926 Mat_Product *product = C->product; 5927 5928 PetscFunctionBegin; 5929 if (product->type == MATPRODUCT_AB) { 5930 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5931 } 5932 PetscFunctionReturn(0); 5933 } 5934 5935 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 5936 is greater than value, or last if there is no such element. 5937 */ 5938 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 5939 { 5940 PetscCount it,step,count = last - first; 5941 5942 PetscFunctionBegin; 5943 while (count > 0) { 5944 it = first; 5945 step = count / 2; 5946 it += step; 5947 if (!(value < array[it])) { 5948 first = ++it; 5949 count -= step + 1; 5950 } else count = step; 5951 } 5952 *upper = first; 5953 PetscFunctionReturn(0); 5954 } 5955 5956 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5957 5958 Input Parameters: 5959 5960 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5961 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5962 5963 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5964 5965 For Set1, j1[] contains column indices of the nonzeros. 5966 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5967 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5968 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5969 5970 Similar for Set2. 5971 5972 This routine merges the two sets of nonzeros row by row and removes repeats. 5973 5974 Output Parameters: (memory is allocated by the caller) 5975 5976 i[],j[]: the CSR of the merged matrix, which has m rows. 5977 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5978 imap2[]: similar to imap1[], but for Set2. 5979 Note we order nonzeros row-by-row and from left to right. 5980 */ 5981 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5982 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5983 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5984 { 5985 PetscInt r,m; /* Row index of mat */ 5986 PetscCount t,t1,t2,b1,e1,b2,e2; 5987 5988 PetscFunctionBegin; 5989 PetscCall(MatGetLocalSize(mat,&m,NULL)); 5990 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5991 i[0] = 0; 5992 for (r=0; r<m; r++) { /* Do row by row merging */ 5993 b1 = rowBegin1[r]; 5994 e1 = rowEnd1[r]; 5995 b2 = rowBegin2[r]; 5996 e2 = rowEnd2[r]; 5997 while (b1 < e1 && b2 < e2) { 5998 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5999 j[t] = j1[b1]; 6000 imap1[t1] = t; 6001 imap2[t2] = t; 6002 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6003 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6004 t1++; t2++; t++; 6005 } else if (j1[b1] < j2[b2]) { 6006 j[t] = j1[b1]; 6007 imap1[t1] = t; 6008 b1 += jmap1[t1+1] - jmap1[t1]; 6009 t1++; t++; 6010 } else { 6011 j[t] = j2[b2]; 6012 imap2[t2] = t; 6013 b2 += jmap2[t2+1] - jmap2[t2]; 6014 t2++; t++; 6015 } 6016 } 6017 /* Merge the remaining in either j1[] or j2[] */ 6018 while (b1 < e1) { 6019 j[t] = j1[b1]; 6020 imap1[t1] = t; 6021 b1 += jmap1[t1+1] - jmap1[t1]; 6022 t1++; t++; 6023 } 6024 while (b2 < e2) { 6025 j[t] = j2[b2]; 6026 imap2[t2] = t; 6027 b2 += jmap2[t2+1] - jmap2[t2]; 6028 t2++; t++; 6029 } 6030 i[r+1] = t; 6031 } 6032 PetscFunctionReturn(0); 6033 } 6034 6035 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6036 6037 Input Parameters: 6038 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6039 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6040 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6041 6042 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6043 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6044 6045 Output Parameters: 6046 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6047 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6048 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6049 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6050 6051 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6052 Atot: number of entries belonging to the diagonal block. 6053 Annz: number of unique nonzeros belonging to the diagonal block. 6054 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6055 repeats (i.e., same 'i,j' pair). 6056 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6057 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6058 6059 Atot: number of entries belonging to the diagonal block 6060 Annz: number of unique nonzeros belonging to the diagonal block. 6061 6062 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6063 6064 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6065 */ 6066 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6067 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6068 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6069 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6070 { 6071 PetscInt cstart,cend,rstart,rend,row,col; 6072 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6073 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6074 PetscCount k,m,p,q,r,s,mid; 6075 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6076 6077 PetscFunctionBegin; 6078 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6079 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6080 m = rend - rstart; 6081 6082 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6083 6084 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6085 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6086 */ 6087 while (k<n) { 6088 row = i[k]; 6089 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6090 for (s=k; s<n; s++) if (i[s] != row) break; 6091 for (p=k; p<s; p++) { 6092 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6093 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6094 } 6095 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6096 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6097 rowBegin[row-rstart] = k; 6098 rowMid[row-rstart] = mid; 6099 rowEnd[row-rstart] = s; 6100 6101 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6102 Atot += mid - k; 6103 Btot += s - mid; 6104 6105 /* Count unique nonzeros of this diag/offdiag row */ 6106 for (p=k; p<mid;) { 6107 col = j[p]; 6108 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6109 Annz++; 6110 } 6111 6112 for (p=mid; p<s;) { 6113 col = j[p]; 6114 do {p++;} while (p<s && j[p] == col); 6115 Bnnz++; 6116 } 6117 k = s; 6118 } 6119 6120 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6121 PetscCall(PetscMalloc1(Atot,&Aperm)); 6122 PetscCall(PetscMalloc1(Btot,&Bperm)); 6123 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6124 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6125 6126 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6127 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6128 for (r=0; r<m; r++) { 6129 k = rowBegin[r]; 6130 mid = rowMid[r]; 6131 s = rowEnd[r]; 6132 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6133 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6134 Atot += mid - k; 6135 Btot += s - mid; 6136 6137 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6138 for (p=k; p<mid;) { 6139 col = j[p]; 6140 q = p; 6141 do {p++;} while (p<mid && j[p] == col); 6142 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6143 Annz++; 6144 } 6145 6146 for (p=mid; p<s;) { 6147 col = j[p]; 6148 q = p; 6149 do {p++;} while (p<s && j[p] == col); 6150 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6151 Bnnz++; 6152 } 6153 } 6154 /* Output */ 6155 *Aperm_ = Aperm; 6156 *Annz_ = Annz; 6157 *Atot_ = Atot; 6158 *Ajmap_ = Ajmap; 6159 *Bperm_ = Bperm; 6160 *Bnnz_ = Bnnz; 6161 *Btot_ = Btot; 6162 *Bjmap_ = Bjmap; 6163 PetscFunctionReturn(0); 6164 } 6165 6166 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6167 6168 Input Parameters: 6169 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6170 nnz: number of unique nonzeros in the merged matrix 6171 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6172 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6173 6174 Output Parameter: (memory is allocated by the caller) 6175 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6176 6177 Example: 6178 nnz1 = 4 6179 nnz = 6 6180 imap = [1,3,4,5] 6181 jmap = [0,3,5,6,7] 6182 then, 6183 jmap_new = [0,0,3,3,5,6,7] 6184 */ 6185 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6186 { 6187 PetscCount k,p; 6188 6189 PetscFunctionBegin; 6190 jmap_new[0] = 0; 6191 p = nnz; /* p loops over jmap_new[] backwards */ 6192 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6193 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6194 } 6195 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6196 PetscFunctionReturn(0); 6197 } 6198 6199 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6200 { 6201 MPI_Comm comm; 6202 PetscMPIInt rank,size; 6203 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6204 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6205 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6206 6207 PetscFunctionBegin; 6208 PetscCall(PetscFree(mpiaij->garray)); 6209 PetscCall(VecDestroy(&mpiaij->lvec)); 6210 #if defined(PETSC_USE_CTABLE) 6211 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6212 #else 6213 PetscCall(PetscFree(mpiaij->colmap)); 6214 #endif 6215 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6216 mat->assembled = PETSC_FALSE; 6217 mat->was_assembled = PETSC_FALSE; 6218 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6219 6220 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6221 PetscCallMPI(MPI_Comm_size(comm,&size)); 6222 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6223 PetscCall(PetscLayoutSetUp(mat->rmap)); 6224 PetscCall(PetscLayoutSetUp(mat->cmap)); 6225 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6226 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6227 PetscCall(MatGetLocalSize(mat,&m,&n)); 6228 PetscCall(MatGetSize(mat,&M,&N)); 6229 6230 /* ---------------------------------------------------------------------------*/ 6231 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6232 /* entries come first, then local rows, then remote rows. */ 6233 /* ---------------------------------------------------------------------------*/ 6234 PetscCount n1 = coo_n,*perm1; 6235 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6236 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6237 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6238 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6239 for (k=0; k<n1; k++) perm1[k] = k; 6240 6241 /* Manipulate indices so that entries with negative row or col indices will have smallest 6242 row indices, local entries will have greater but negative row indices, and remote entries 6243 will have positive row indices. 6244 */ 6245 for (k=0; k<n1; k++) { 6246 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6247 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6248 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6249 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6250 } 6251 6252 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6253 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6254 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6255 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6256 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6257 6258 /* ---------------------------------------------------------------------------*/ 6259 /* Split local rows into diag/offdiag portions */ 6260 /* ---------------------------------------------------------------------------*/ 6261 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6262 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6263 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6264 6265 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6266 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6267 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6268 6269 /* ---------------------------------------------------------------------------*/ 6270 /* Send remote rows to their owner */ 6271 /* ---------------------------------------------------------------------------*/ 6272 /* Find which rows should be sent to which remote ranks*/ 6273 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6274 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6275 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6276 const PetscInt *ranges; 6277 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6278 6279 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6280 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6281 for (k=rem; k<n1;) { 6282 PetscMPIInt owner; 6283 PetscInt firstRow,lastRow; 6284 6285 /* Locate a row range */ 6286 firstRow = i1[k]; /* first row of this owner */ 6287 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6288 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6289 6290 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6291 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6292 6293 /* All entries in [k,p) belong to this remote owner */ 6294 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6295 PetscMPIInt *sendto2; 6296 PetscInt *nentries2; 6297 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6298 6299 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6300 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6301 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6302 PetscCall(PetscFree2(sendto,nentries2)); 6303 sendto = sendto2; 6304 nentries = nentries2; 6305 maxNsend = maxNsend2; 6306 } 6307 sendto[nsend] = owner; 6308 nentries[nsend] = p - k; 6309 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6310 nsend++; 6311 k = p; 6312 } 6313 6314 /* Build 1st SF to know offsets on remote to send data */ 6315 PetscSF sf1; 6316 PetscInt nroots = 1,nroots2 = 0; 6317 PetscInt nleaves = nsend,nleaves2 = 0; 6318 PetscInt *offsets; 6319 PetscSFNode *iremote; 6320 6321 PetscCall(PetscSFCreate(comm,&sf1)); 6322 PetscCall(PetscMalloc1(nsend,&iremote)); 6323 PetscCall(PetscMalloc1(nsend,&offsets)); 6324 for (k=0; k<nsend; k++) { 6325 iremote[k].rank = sendto[k]; 6326 iremote[k].index = 0; 6327 nleaves2 += nentries[k]; 6328 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6329 } 6330 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6331 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6332 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6333 PetscCall(PetscSFDestroy(&sf1)); 6334 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6335 6336 /* Build 2nd SF to send remote COOs to their owner */ 6337 PetscSF sf2; 6338 nroots = nroots2; 6339 nleaves = nleaves2; 6340 PetscCall(PetscSFCreate(comm,&sf2)); 6341 PetscCall(PetscSFSetFromOptions(sf2)); 6342 PetscCall(PetscMalloc1(nleaves,&iremote)); 6343 p = 0; 6344 for (k=0; k<nsend; k++) { 6345 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6346 for (q=0; q<nentries[k]; q++,p++) { 6347 iremote[p].rank = sendto[k]; 6348 iremote[p].index = offsets[k] + q; 6349 } 6350 } 6351 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6352 6353 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6354 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6355 6356 /* Send the remote COOs to their owner */ 6357 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6358 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6359 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6360 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6361 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6362 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6363 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6364 6365 PetscCall(PetscFree(offsets)); 6366 PetscCall(PetscFree2(sendto,nentries)); 6367 6368 /* ---------------------------------------------------------------*/ 6369 /* Sort received COOs by row along with the permutation array */ 6370 /* ---------------------------------------------------------------*/ 6371 for (k=0; k<n2; k++) perm2[k] = k; 6372 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6373 6374 /* ---------------------------------------------------------------*/ 6375 /* Split received COOs into diag/offdiag portions */ 6376 /* ---------------------------------------------------------------*/ 6377 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6378 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6379 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6380 6381 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6382 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6383 6384 /* --------------------------------------------------------------------------*/ 6385 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6386 /* --------------------------------------------------------------------------*/ 6387 PetscInt *Ai,*Bi; 6388 PetscInt *Aj,*Bj; 6389 6390 PetscCall(PetscMalloc1(m+1,&Ai)); 6391 PetscCall(PetscMalloc1(m+1,&Bi)); 6392 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6393 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6394 6395 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6396 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6397 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6398 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6399 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6400 6401 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6402 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6403 6404 /* --------------------------------------------------------------------------*/ 6405 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6406 /* expect nonzeros in A/B most likely have local contributing entries */ 6407 /* --------------------------------------------------------------------------*/ 6408 PetscInt Annz = Ai[m]; 6409 PetscInt Bnnz = Bi[m]; 6410 PetscCount *Ajmap1_new,*Bjmap1_new; 6411 6412 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6413 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6414 6415 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6416 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6417 6418 PetscCall(PetscFree(Aimap1)); 6419 PetscCall(PetscFree(Ajmap1)); 6420 PetscCall(PetscFree(Bimap1)); 6421 PetscCall(PetscFree(Bjmap1)); 6422 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6423 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6424 PetscCall(PetscFree3(i1,j1,perm1)); 6425 PetscCall(PetscFree3(i2,j2,perm2)); 6426 6427 Ajmap1 = Ajmap1_new; 6428 Bjmap1 = Bjmap1_new; 6429 6430 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6431 if (Annz < Annz1 + Annz2) { 6432 PetscInt *Aj_new; 6433 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6434 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6435 PetscCall(PetscFree(Aj)); 6436 Aj = Aj_new; 6437 } 6438 6439 if (Bnnz < Bnnz1 + Bnnz2) { 6440 PetscInt *Bj_new; 6441 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6442 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6443 PetscCall(PetscFree(Bj)); 6444 Bj = Bj_new; 6445 } 6446 6447 /* --------------------------------------------------------------------------------*/ 6448 /* Create new submatrices for on-process and off-process coupling */ 6449 /* --------------------------------------------------------------------------------*/ 6450 PetscScalar *Aa,*Ba; 6451 MatType rtype; 6452 Mat_SeqAIJ *a,*b; 6453 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6454 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6455 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6456 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6457 PetscCall(MatDestroy(&mpiaij->A)); 6458 PetscCall(MatDestroy(&mpiaij->B)); 6459 PetscCall(MatGetRootType_Private(mat,&rtype)); 6460 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6461 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6462 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6463 6464 a = (Mat_SeqAIJ*)mpiaij->A->data; 6465 b = (Mat_SeqAIJ*)mpiaij->B->data; 6466 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6467 a->free_a = b->free_a = PETSC_TRUE; 6468 a->free_ij = b->free_ij = PETSC_TRUE; 6469 6470 /* conversion must happen AFTER multiply setup */ 6471 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6472 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6473 PetscCall(VecDestroy(&mpiaij->lvec)); 6474 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6475 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6476 6477 mpiaij->coo_n = coo_n; 6478 mpiaij->coo_sf = sf2; 6479 mpiaij->sendlen = nleaves; 6480 mpiaij->recvlen = nroots; 6481 6482 mpiaij->Annz = Annz; 6483 mpiaij->Bnnz = Bnnz; 6484 6485 mpiaij->Annz2 = Annz2; 6486 mpiaij->Bnnz2 = Bnnz2; 6487 6488 mpiaij->Atot1 = Atot1; 6489 mpiaij->Atot2 = Atot2; 6490 mpiaij->Btot1 = Btot1; 6491 mpiaij->Btot2 = Btot2; 6492 6493 mpiaij->Ajmap1 = Ajmap1; 6494 mpiaij->Aperm1 = Aperm1; 6495 6496 mpiaij->Bjmap1 = Bjmap1; 6497 mpiaij->Bperm1 = Bperm1; 6498 6499 mpiaij->Aimap2 = Aimap2; 6500 mpiaij->Ajmap2 = Ajmap2; 6501 mpiaij->Aperm2 = Aperm2; 6502 6503 mpiaij->Bimap2 = Bimap2; 6504 mpiaij->Bjmap2 = Bjmap2; 6505 mpiaij->Bperm2 = Bperm2; 6506 6507 mpiaij->Cperm1 = Cperm1; 6508 6509 /* Allocate in preallocation. If not used, it has zero cost on host */ 6510 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6511 PetscFunctionReturn(0); 6512 } 6513 6514 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6515 { 6516 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6517 Mat A = mpiaij->A,B = mpiaij->B; 6518 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6519 PetscScalar *Aa,*Ba; 6520 PetscScalar *sendbuf = mpiaij->sendbuf; 6521 PetscScalar *recvbuf = mpiaij->recvbuf; 6522 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6523 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6524 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6525 const PetscCount *Cperm1 = mpiaij->Cperm1; 6526 6527 PetscFunctionBegin; 6528 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6529 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6530 6531 /* Pack entries to be sent to remote */ 6532 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6533 6534 /* Send remote entries to their owner and overlap the communication with local computation */ 6535 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6536 /* Add local entries to A and B */ 6537 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6538 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6539 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6540 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6541 } 6542 for (PetscCount i=0; i<Bnnz; i++) { 6543 PetscScalar sum = 0.0; 6544 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6545 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6546 } 6547 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6548 6549 /* Add received remote entries to A and B */ 6550 for (PetscCount i=0; i<Annz2; i++) { 6551 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6552 } 6553 for (PetscCount i=0; i<Bnnz2; i++) { 6554 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6555 } 6556 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6557 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6558 PetscFunctionReturn(0); 6559 } 6560 6561 /* ----------------------------------------------------------------*/ 6562 6563 /*MC 6564 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6565 6566 Options Database Keys: 6567 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6568 6569 Level: beginner 6570 6571 Notes: 6572 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6573 in this case the values associated with the rows and columns one passes in are set to zero 6574 in the matrix 6575 6576 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6577 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6578 6579 .seealso: MatCreateAIJ() 6580 M*/ 6581 6582 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6583 { 6584 Mat_MPIAIJ *b; 6585 PetscMPIInt size; 6586 6587 PetscFunctionBegin; 6588 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6589 6590 PetscCall(PetscNewLog(B,&b)); 6591 B->data = (void*)b; 6592 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6593 B->assembled = PETSC_FALSE; 6594 B->insertmode = NOT_SET_VALUES; 6595 b->size = size; 6596 6597 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6598 6599 /* build cache for off array entries formed */ 6600 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6601 6602 b->donotstash = PETSC_FALSE; 6603 b->colmap = NULL; 6604 b->garray = NULL; 6605 b->roworiented = PETSC_TRUE; 6606 6607 /* stuff used for matrix vector multiply */ 6608 b->lvec = NULL; 6609 b->Mvctx = NULL; 6610 6611 /* stuff for MatGetRow() */ 6612 b->rowindices = NULL; 6613 b->rowvalues = NULL; 6614 b->getrowactive = PETSC_FALSE; 6615 6616 /* flexible pointer used in CUSPARSE classes */ 6617 b->spptr = NULL; 6618 6619 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6620 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6621 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6622 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6623 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6624 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6625 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6626 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6627 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6628 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6629 #if defined(PETSC_HAVE_CUDA) 6630 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6631 #endif 6632 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6633 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6634 #endif 6635 #if defined(PETSC_HAVE_MKL_SPARSE) 6636 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6637 #endif 6638 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6639 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6640 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6641 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6642 #if defined(PETSC_HAVE_ELEMENTAL) 6643 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6644 #endif 6645 #if defined(PETSC_HAVE_SCALAPACK) 6646 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6647 #endif 6648 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6649 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6650 #if defined(PETSC_HAVE_HYPRE) 6651 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6652 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6653 #endif 6654 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6658 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6659 PetscFunctionReturn(0); 6660 } 6661 6662 /*@C 6663 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6664 and "off-diagonal" part of the matrix in CSR format. 6665 6666 Collective 6667 6668 Input Parameters: 6669 + comm - MPI communicator 6670 . m - number of local rows (Cannot be PETSC_DECIDE) 6671 . n - This value should be the same as the local size used in creating the 6672 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6673 calculated if N is given) For square matrices n is almost always m. 6674 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6675 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6676 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6677 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6678 . a - matrix values 6679 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6680 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6681 - oa - matrix values 6682 6683 Output Parameter: 6684 . mat - the matrix 6685 6686 Level: advanced 6687 6688 Notes: 6689 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6690 must free the arrays once the matrix has been destroyed and not before. 6691 6692 The i and j indices are 0 based 6693 6694 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6695 6696 This sets local rows and cannot be used to set off-processor values. 6697 6698 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6699 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6700 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6701 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6702 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6703 communication if it is known that only local entries will be set. 6704 6705 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6706 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6707 @*/ 6708 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6709 { 6710 Mat_MPIAIJ *maij; 6711 6712 PetscFunctionBegin; 6713 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6714 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6715 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6716 PetscCall(MatCreate(comm,mat)); 6717 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6718 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6719 maij = (Mat_MPIAIJ*) (*mat)->data; 6720 6721 (*mat)->preallocated = PETSC_TRUE; 6722 6723 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6724 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6725 6726 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6727 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6728 6729 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6730 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6731 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6732 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6733 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6734 PetscFunctionReturn(0); 6735 } 6736 6737 typedef struct { 6738 Mat *mp; /* intermediate products */ 6739 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6740 PetscInt cp; /* number of intermediate products */ 6741 6742 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6743 PetscInt *startsj_s,*startsj_r; 6744 PetscScalar *bufa; 6745 Mat P_oth; 6746 6747 /* may take advantage of merging product->B */ 6748 Mat Bloc; /* B-local by merging diag and off-diag */ 6749 6750 /* cusparse does not have support to split between symbolic and numeric phases. 6751 When api_user is true, we don't need to update the numerical values 6752 of the temporary storage */ 6753 PetscBool reusesym; 6754 6755 /* support for COO values insertion */ 6756 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6757 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6758 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6759 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6760 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6761 PetscMemType mtype; 6762 6763 /* customization */ 6764 PetscBool abmerge; 6765 PetscBool P_oth_bind; 6766 } MatMatMPIAIJBACKEND; 6767 6768 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6769 { 6770 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6771 PetscInt i; 6772 6773 PetscFunctionBegin; 6774 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6775 PetscCall(PetscFree(mmdata->bufa)); 6776 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6777 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6778 PetscCall(MatDestroy(&mmdata->P_oth)); 6779 PetscCall(MatDestroy(&mmdata->Bloc)); 6780 PetscCall(PetscSFDestroy(&mmdata->sf)); 6781 for (i = 0; i < mmdata->cp; i++) { 6782 PetscCall(MatDestroy(&mmdata->mp[i])); 6783 } 6784 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6785 PetscCall(PetscFree(mmdata->own[0])); 6786 PetscCall(PetscFree(mmdata->own)); 6787 PetscCall(PetscFree(mmdata->off[0])); 6788 PetscCall(PetscFree(mmdata->off)); 6789 PetscCall(PetscFree(mmdata)); 6790 PetscFunctionReturn(0); 6791 } 6792 6793 /* Copy selected n entries with indices in idx[] of A to v[]. 6794 If idx is NULL, copy the whole data array of A to v[] 6795 */ 6796 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6797 { 6798 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6799 6800 PetscFunctionBegin; 6801 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6802 if (f) { 6803 PetscCall((*f)(A,n,idx,v)); 6804 } else { 6805 const PetscScalar *vv; 6806 6807 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6808 if (n && idx) { 6809 PetscScalar *w = v; 6810 const PetscInt *oi = idx; 6811 PetscInt j; 6812 6813 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6814 } else { 6815 PetscCall(PetscArraycpy(v,vv,n)); 6816 } 6817 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6818 } 6819 PetscFunctionReturn(0); 6820 } 6821 6822 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6823 { 6824 MatMatMPIAIJBACKEND *mmdata; 6825 PetscInt i,n_d,n_o; 6826 6827 PetscFunctionBegin; 6828 MatCheckProduct(C,1); 6829 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6830 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6831 if (!mmdata->reusesym) { /* update temporary matrices */ 6832 if (mmdata->P_oth) { 6833 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6834 } 6835 if (mmdata->Bloc) { 6836 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6837 } 6838 } 6839 mmdata->reusesym = PETSC_FALSE; 6840 6841 for (i = 0; i < mmdata->cp; i++) { 6842 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6843 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6844 } 6845 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6846 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6847 6848 if (mmdata->mptmp[i]) continue; 6849 if (noff) { 6850 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6851 6852 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6853 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6854 n_o += noff; 6855 n_d += nown; 6856 } else { 6857 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6858 6859 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6860 n_d += mm->nz; 6861 } 6862 } 6863 if (mmdata->hasoffproc) { /* offprocess insertion */ 6864 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6865 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6866 } 6867 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6868 PetscFunctionReturn(0); 6869 } 6870 6871 /* Support for Pt * A, A * P, or Pt * A * P */ 6872 #define MAX_NUMBER_INTERMEDIATE 4 6873 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6874 { 6875 Mat_Product *product = C->product; 6876 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6877 Mat_MPIAIJ *a,*p; 6878 MatMatMPIAIJBACKEND *mmdata; 6879 ISLocalToGlobalMapping P_oth_l2g = NULL; 6880 IS glob = NULL; 6881 const char *prefix; 6882 char pprefix[256]; 6883 const PetscInt *globidx,*P_oth_idx; 6884 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6885 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6886 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6887 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6888 /* a base offset; type-2: sparse with a local to global map table */ 6889 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6890 6891 MatProductType ptype; 6892 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6893 PetscMPIInt size; 6894 6895 PetscFunctionBegin; 6896 MatCheckProduct(C,1); 6897 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6898 ptype = product->type; 6899 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6900 ptype = MATPRODUCT_AB; 6901 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6902 } 6903 switch (ptype) { 6904 case MATPRODUCT_AB: 6905 A = product->A; 6906 P = product->B; 6907 m = A->rmap->n; 6908 n = P->cmap->n; 6909 M = A->rmap->N; 6910 N = P->cmap->N; 6911 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6912 break; 6913 case MATPRODUCT_AtB: 6914 P = product->A; 6915 A = product->B; 6916 m = P->cmap->n; 6917 n = A->cmap->n; 6918 M = P->cmap->N; 6919 N = A->cmap->N; 6920 hasoffproc = PETSC_TRUE; 6921 break; 6922 case MATPRODUCT_PtAP: 6923 A = product->A; 6924 P = product->B; 6925 m = P->cmap->n; 6926 n = P->cmap->n; 6927 M = P->cmap->N; 6928 N = P->cmap->N; 6929 hasoffproc = PETSC_TRUE; 6930 break; 6931 default: 6932 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6933 } 6934 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6935 if (size == 1) hasoffproc = PETSC_FALSE; 6936 6937 /* defaults */ 6938 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6939 mp[i] = NULL; 6940 mptmp[i] = PETSC_FALSE; 6941 rmapt[i] = -1; 6942 cmapt[i] = -1; 6943 rmapa[i] = NULL; 6944 cmapa[i] = NULL; 6945 } 6946 6947 /* customization */ 6948 PetscCall(PetscNew(&mmdata)); 6949 mmdata->reusesym = product->api_user; 6950 if (ptype == MATPRODUCT_AB) { 6951 if (product->api_user) { 6952 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6953 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6954 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6955 PetscOptionsEnd(); 6956 } else { 6957 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6958 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6959 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6960 PetscOptionsEnd(); 6961 } 6962 } else if (ptype == MATPRODUCT_PtAP) { 6963 if (product->api_user) { 6964 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 6965 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6966 PetscOptionsEnd(); 6967 } else { 6968 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 6969 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6970 PetscOptionsEnd(); 6971 } 6972 } 6973 a = (Mat_MPIAIJ*)A->data; 6974 p = (Mat_MPIAIJ*)P->data; 6975 PetscCall(MatSetSizes(C,m,n,M,N)); 6976 PetscCall(PetscLayoutSetUp(C->rmap)); 6977 PetscCall(PetscLayoutSetUp(C->cmap)); 6978 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6979 PetscCall(MatGetOptionsPrefix(C,&prefix)); 6980 6981 cp = 0; 6982 switch (ptype) { 6983 case MATPRODUCT_AB: /* A * P */ 6984 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6985 6986 /* A_diag * P_local (merged or not) */ 6987 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6988 /* P is product->B */ 6989 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6990 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 6991 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6992 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6993 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6994 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6995 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6996 mp[cp]->product->api_user = product->api_user; 6997 PetscCall(MatProductSetFromOptions(mp[cp])); 6998 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6999 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7000 PetscCall(ISGetIndices(glob,&globidx)); 7001 rmapt[cp] = 1; 7002 cmapt[cp] = 2; 7003 cmapa[cp] = globidx; 7004 mptmp[cp] = PETSC_FALSE; 7005 cp++; 7006 } else { /* A_diag * P_diag and A_diag * P_off */ 7007 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7008 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7009 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7010 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7011 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7012 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7013 mp[cp]->product->api_user = product->api_user; 7014 PetscCall(MatProductSetFromOptions(mp[cp])); 7015 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7016 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7017 rmapt[cp] = 1; 7018 cmapt[cp] = 1; 7019 mptmp[cp] = PETSC_FALSE; 7020 cp++; 7021 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7022 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7023 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7024 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7025 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7026 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7027 mp[cp]->product->api_user = product->api_user; 7028 PetscCall(MatProductSetFromOptions(mp[cp])); 7029 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7030 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7031 rmapt[cp] = 1; 7032 cmapt[cp] = 2; 7033 cmapa[cp] = p->garray; 7034 mptmp[cp] = PETSC_FALSE; 7035 cp++; 7036 } 7037 7038 /* A_off * P_other */ 7039 if (mmdata->P_oth) { 7040 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7041 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7042 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7043 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7044 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7045 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7046 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7047 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7048 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7049 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7050 mp[cp]->product->api_user = product->api_user; 7051 PetscCall(MatProductSetFromOptions(mp[cp])); 7052 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7053 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7054 rmapt[cp] = 1; 7055 cmapt[cp] = 2; 7056 cmapa[cp] = P_oth_idx; 7057 mptmp[cp] = PETSC_FALSE; 7058 cp++; 7059 } 7060 break; 7061 7062 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7063 /* A is product->B */ 7064 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7065 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7066 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7067 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7068 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7069 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7070 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7071 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7072 mp[cp]->product->api_user = product->api_user; 7073 PetscCall(MatProductSetFromOptions(mp[cp])); 7074 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7075 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7076 PetscCall(ISGetIndices(glob,&globidx)); 7077 rmapt[cp] = 2; 7078 rmapa[cp] = globidx; 7079 cmapt[cp] = 2; 7080 cmapa[cp] = globidx; 7081 mptmp[cp] = PETSC_FALSE; 7082 cp++; 7083 } else { 7084 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7085 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7086 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7087 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7088 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7089 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7090 mp[cp]->product->api_user = product->api_user; 7091 PetscCall(MatProductSetFromOptions(mp[cp])); 7092 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7093 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7094 PetscCall(ISGetIndices(glob,&globidx)); 7095 rmapt[cp] = 1; 7096 cmapt[cp] = 2; 7097 cmapa[cp] = globidx; 7098 mptmp[cp] = PETSC_FALSE; 7099 cp++; 7100 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7101 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7102 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7103 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7104 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7105 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7106 mp[cp]->product->api_user = product->api_user; 7107 PetscCall(MatProductSetFromOptions(mp[cp])); 7108 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7109 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7110 rmapt[cp] = 2; 7111 rmapa[cp] = p->garray; 7112 cmapt[cp] = 2; 7113 cmapa[cp] = globidx; 7114 mptmp[cp] = PETSC_FALSE; 7115 cp++; 7116 } 7117 break; 7118 case MATPRODUCT_PtAP: 7119 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7120 /* P is product->B */ 7121 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7122 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7123 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7124 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7125 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7126 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7127 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7128 mp[cp]->product->api_user = product->api_user; 7129 PetscCall(MatProductSetFromOptions(mp[cp])); 7130 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7131 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7132 PetscCall(ISGetIndices(glob,&globidx)); 7133 rmapt[cp] = 2; 7134 rmapa[cp] = globidx; 7135 cmapt[cp] = 2; 7136 cmapa[cp] = globidx; 7137 mptmp[cp] = PETSC_FALSE; 7138 cp++; 7139 if (mmdata->P_oth) { 7140 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7141 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7142 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7143 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7144 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7145 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7146 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7147 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7148 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7149 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7150 mp[cp]->product->api_user = product->api_user; 7151 PetscCall(MatProductSetFromOptions(mp[cp])); 7152 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7153 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7154 mptmp[cp] = PETSC_TRUE; 7155 cp++; 7156 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7157 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7158 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7159 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7160 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7161 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7162 mp[cp]->product->api_user = product->api_user; 7163 PetscCall(MatProductSetFromOptions(mp[cp])); 7164 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7165 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7166 rmapt[cp] = 2; 7167 rmapa[cp] = globidx; 7168 cmapt[cp] = 2; 7169 cmapa[cp] = P_oth_idx; 7170 mptmp[cp] = PETSC_FALSE; 7171 cp++; 7172 } 7173 break; 7174 default: 7175 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7176 } 7177 /* sanity check */ 7178 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7179 7180 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7181 for (i = 0; i < cp; i++) { 7182 mmdata->mp[i] = mp[i]; 7183 mmdata->mptmp[i] = mptmp[i]; 7184 } 7185 mmdata->cp = cp; 7186 C->product->data = mmdata; 7187 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7188 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7189 7190 /* memory type */ 7191 mmdata->mtype = PETSC_MEMTYPE_HOST; 7192 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7193 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7194 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7195 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7196 7197 /* prepare coo coordinates for values insertion */ 7198 7199 /* count total nonzeros of those intermediate seqaij Mats 7200 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7201 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7202 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7203 */ 7204 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7205 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7206 if (mptmp[cp]) continue; 7207 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7208 const PetscInt *rmap = rmapa[cp]; 7209 const PetscInt mr = mp[cp]->rmap->n; 7210 const PetscInt rs = C->rmap->rstart; 7211 const PetscInt re = C->rmap->rend; 7212 const PetscInt *ii = mm->i; 7213 for (i = 0; i < mr; i++) { 7214 const PetscInt gr = rmap[i]; 7215 const PetscInt nz = ii[i+1] - ii[i]; 7216 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7217 else ncoo_oown += nz; /* this row is local */ 7218 } 7219 } else ncoo_d += mm->nz; 7220 } 7221 7222 /* 7223 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7224 7225 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7226 7227 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7228 7229 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7230 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7231 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7232 7233 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7234 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7235 */ 7236 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7237 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7238 7239 /* gather (i,j) of nonzeros inserted by remote procs */ 7240 if (hasoffproc) { 7241 PetscSF msf; 7242 PetscInt ncoo2,*coo_i2,*coo_j2; 7243 7244 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7245 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7246 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7247 7248 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7249 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7250 PetscInt *idxoff = mmdata->off[cp]; 7251 PetscInt *idxown = mmdata->own[cp]; 7252 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7253 const PetscInt *rmap = rmapa[cp]; 7254 const PetscInt *cmap = cmapa[cp]; 7255 const PetscInt *ii = mm->i; 7256 PetscInt *coi = coo_i + ncoo_o; 7257 PetscInt *coj = coo_j + ncoo_o; 7258 const PetscInt mr = mp[cp]->rmap->n; 7259 const PetscInt rs = C->rmap->rstart; 7260 const PetscInt re = C->rmap->rend; 7261 const PetscInt cs = C->cmap->rstart; 7262 for (i = 0; i < mr; i++) { 7263 const PetscInt *jj = mm->j + ii[i]; 7264 const PetscInt gr = rmap[i]; 7265 const PetscInt nz = ii[i+1] - ii[i]; 7266 if (gr < rs || gr >= re) { /* this is an offproc row */ 7267 for (j = ii[i]; j < ii[i+1]; j++) { 7268 *coi++ = gr; 7269 *idxoff++ = j; 7270 } 7271 if (!cmapt[cp]) { /* already global */ 7272 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7273 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7274 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7275 } else { /* offdiag */ 7276 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7277 } 7278 ncoo_o += nz; 7279 } else { /* this is a local row */ 7280 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7281 } 7282 } 7283 } 7284 mmdata->off[cp + 1] = idxoff; 7285 mmdata->own[cp + 1] = idxown; 7286 } 7287 7288 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7289 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7290 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7291 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7292 ncoo = ncoo_d + ncoo_oown + ncoo2; 7293 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7294 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7295 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7296 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7297 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7298 PetscCall(PetscFree2(coo_i,coo_j)); 7299 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7300 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7301 coo_i = coo_i2; 7302 coo_j = coo_j2; 7303 } else { /* no offproc values insertion */ 7304 ncoo = ncoo_d; 7305 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7306 7307 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7308 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7309 PetscCall(PetscSFSetUp(mmdata->sf)); 7310 } 7311 mmdata->hasoffproc = hasoffproc; 7312 7313 /* gather (i,j) of nonzeros inserted locally */ 7314 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7315 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7316 PetscInt *coi = coo_i + ncoo_d; 7317 PetscInt *coj = coo_j + ncoo_d; 7318 const PetscInt *jj = mm->j; 7319 const PetscInt *ii = mm->i; 7320 const PetscInt *cmap = cmapa[cp]; 7321 const PetscInt *rmap = rmapa[cp]; 7322 const PetscInt mr = mp[cp]->rmap->n; 7323 const PetscInt rs = C->rmap->rstart; 7324 const PetscInt re = C->rmap->rend; 7325 const PetscInt cs = C->cmap->rstart; 7326 7327 if (mptmp[cp]) continue; 7328 if (rmapt[cp] == 1) { /* consecutive rows */ 7329 /* fill coo_i */ 7330 for (i = 0; i < mr; i++) { 7331 const PetscInt gr = i + rs; 7332 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7333 } 7334 /* fill coo_j */ 7335 if (!cmapt[cp]) { /* type-0, already global */ 7336 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7337 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7338 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7339 } else { /* type-2, local to global for sparse columns */ 7340 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7341 } 7342 ncoo_d += mm->nz; 7343 } else if (rmapt[cp] == 2) { /* sparse rows */ 7344 for (i = 0; i < mr; i++) { 7345 const PetscInt *jj = mm->j + ii[i]; 7346 const PetscInt gr = rmap[i]; 7347 const PetscInt nz = ii[i+1] - ii[i]; 7348 if (gr >= rs && gr < re) { /* local rows */ 7349 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7350 if (!cmapt[cp]) { /* type-0, already global */ 7351 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7352 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7353 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7354 } else { /* type-2, local to global for sparse columns */ 7355 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7356 } 7357 ncoo_d += nz; 7358 } 7359 } 7360 } 7361 } 7362 if (glob) { 7363 PetscCall(ISRestoreIndices(glob,&globidx)); 7364 } 7365 PetscCall(ISDestroy(&glob)); 7366 if (P_oth_l2g) { 7367 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7368 } 7369 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7370 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7371 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7372 7373 /* preallocate with COO data */ 7374 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7375 PetscCall(PetscFree2(coo_i,coo_j)); 7376 PetscFunctionReturn(0); 7377 } 7378 7379 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7380 { 7381 Mat_Product *product = mat->product; 7382 #if defined(PETSC_HAVE_DEVICE) 7383 PetscBool match = PETSC_FALSE; 7384 PetscBool usecpu = PETSC_FALSE; 7385 #else 7386 PetscBool match = PETSC_TRUE; 7387 #endif 7388 7389 PetscFunctionBegin; 7390 MatCheckProduct(mat,1); 7391 #if defined(PETSC_HAVE_DEVICE) 7392 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7393 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7394 } 7395 if (match) { /* we can always fallback to the CPU if requested */ 7396 switch (product->type) { 7397 case MATPRODUCT_AB: 7398 if (product->api_user) { 7399 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7400 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7401 PetscOptionsEnd(); 7402 } else { 7403 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7404 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7405 PetscOptionsEnd(); 7406 } 7407 break; 7408 case MATPRODUCT_AtB: 7409 if (product->api_user) { 7410 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7411 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7412 PetscOptionsEnd(); 7413 } else { 7414 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7415 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7416 PetscOptionsEnd(); 7417 } 7418 break; 7419 case MATPRODUCT_PtAP: 7420 if (product->api_user) { 7421 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7422 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7423 PetscOptionsEnd(); 7424 } else { 7425 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7426 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7427 PetscOptionsEnd(); 7428 } 7429 break; 7430 default: 7431 break; 7432 } 7433 match = (PetscBool)!usecpu; 7434 } 7435 #endif 7436 if (match) { 7437 switch (product->type) { 7438 case MATPRODUCT_AB: 7439 case MATPRODUCT_AtB: 7440 case MATPRODUCT_PtAP: 7441 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7442 break; 7443 default: 7444 break; 7445 } 7446 } 7447 /* fallback to MPIAIJ ops */ 7448 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7449 PetscFunctionReturn(0); 7450 } 7451 7452 /* 7453 Special version for direct calls from Fortran 7454 */ 7455 #include <petsc/private/fortranimpl.h> 7456 7457 /* Change these macros so can be used in void function */ 7458 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7459 #undef PetscCall 7460 #define PetscCall(...) do { \ 7461 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7462 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7463 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7464 return; \ 7465 } \ 7466 } while (0) 7467 7468 #undef SETERRQ 7469 #define SETERRQ(comm,ierr,...) do { \ 7470 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7471 return; \ 7472 } while (0) 7473 7474 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7475 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7476 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7477 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7478 #else 7479 #endif 7480 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7481 { 7482 Mat mat = *mmat; 7483 PetscInt m = *mm, n = *mn; 7484 InsertMode addv = *maddv; 7485 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7486 PetscScalar value; 7487 7488 MatCheckPreallocated(mat,1); 7489 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7490 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7491 { 7492 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7493 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7494 PetscBool roworiented = aij->roworiented; 7495 7496 /* Some Variables required in the macro */ 7497 Mat A = aij->A; 7498 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7499 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7500 MatScalar *aa; 7501 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7502 Mat B = aij->B; 7503 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7504 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7505 MatScalar *ba; 7506 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7507 * cannot use "#if defined" inside a macro. */ 7508 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7509 7510 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7511 PetscInt nonew = a->nonew; 7512 MatScalar *ap1,*ap2; 7513 7514 PetscFunctionBegin; 7515 PetscCall(MatSeqAIJGetArray(A,&aa)); 7516 PetscCall(MatSeqAIJGetArray(B,&ba)); 7517 for (i=0; i<m; i++) { 7518 if (im[i] < 0) continue; 7519 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7520 if (im[i] >= rstart && im[i] < rend) { 7521 row = im[i] - rstart; 7522 lastcol1 = -1; 7523 rp1 = aj + ai[row]; 7524 ap1 = aa + ai[row]; 7525 rmax1 = aimax[row]; 7526 nrow1 = ailen[row]; 7527 low1 = 0; 7528 high1 = nrow1; 7529 lastcol2 = -1; 7530 rp2 = bj + bi[row]; 7531 ap2 = ba + bi[row]; 7532 rmax2 = bimax[row]; 7533 nrow2 = bilen[row]; 7534 low2 = 0; 7535 high2 = nrow2; 7536 7537 for (j=0; j<n; j++) { 7538 if (roworiented) value = v[i*n+j]; 7539 else value = v[i+j*m]; 7540 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7541 if (in[j] >= cstart && in[j] < cend) { 7542 col = in[j] - cstart; 7543 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7544 } else if (in[j] < 0) continue; 7545 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7546 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7547 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7548 } else { 7549 if (mat->was_assembled) { 7550 if (!aij->colmap) { 7551 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7552 } 7553 #if defined(PETSC_USE_CTABLE) 7554 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7555 col--; 7556 #else 7557 col = aij->colmap[in[j]] - 1; 7558 #endif 7559 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7560 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7561 col = in[j]; 7562 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7563 B = aij->B; 7564 b = (Mat_SeqAIJ*)B->data; 7565 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7566 rp2 = bj + bi[row]; 7567 ap2 = ba + bi[row]; 7568 rmax2 = bimax[row]; 7569 nrow2 = bilen[row]; 7570 low2 = 0; 7571 high2 = nrow2; 7572 bm = aij->B->rmap->n; 7573 ba = b->a; 7574 inserted = PETSC_FALSE; 7575 } 7576 } else col = in[j]; 7577 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7578 } 7579 } 7580 } else if (!aij->donotstash) { 7581 if (roworiented) { 7582 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7583 } else { 7584 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7585 } 7586 } 7587 } 7588 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7589 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7590 } 7591 PetscFunctionReturnVoid(); 7592 } 7593 /* Undefining these here since they were redefined from their original definition above! No 7594 * other PETSc functions should be defined past this point, as it is impossible to recover the 7595 * original definitions */ 7596 #undef PetscCall 7597 #undef SETERRQ 7598