1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 51 PetscFunctionBegin; 52 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 53 A->boundtocpu = flg; 54 #endif 55 if (a->A) { 56 PetscCall(MatBindToCPU(a->A,flg)); 57 } 58 if (a->B) { 59 PetscCall(MatBindToCPU(a->B,flg)); 60 } 61 62 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 63 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 64 * to differ from the parent matrix. */ 65 if (a->lvec) { 66 PetscCall(VecBindToCPU(a->lvec,flg)); 67 } 68 if (a->diag) { 69 PetscCall(VecBindToCPU(a->diag,flg)); 70 } 71 72 PetscFunctionReturn(0); 73 } 74 75 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 76 { 77 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 78 79 PetscFunctionBegin; 80 if (mat->A) { 81 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 82 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 83 } 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 88 { 89 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 90 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 91 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 92 const PetscInt *ia,*ib; 93 const MatScalar *aa,*bb,*aav,*bav; 94 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 95 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 96 97 PetscFunctionBegin; 98 *keptrows = NULL; 99 100 ia = a->i; 101 ib = b->i; 102 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 103 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 104 for (i=0; i<m; i++) { 105 na = ia[i+1] - ia[i]; 106 nb = ib[i+1] - ib[i]; 107 if (!na && !nb) { 108 cnt++; 109 goto ok1; 110 } 111 aa = aav + ia[i]; 112 for (j=0; j<na; j++) { 113 if (aa[j] != 0.0) goto ok1; 114 } 115 bb = bav + ib[i]; 116 for (j=0; j <nb; j++) { 117 if (bb[j] != 0.0) goto ok1; 118 } 119 cnt++; 120 ok1:; 121 } 122 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 123 if (!n0rows) { 124 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 126 PetscFunctionReturn(0); 127 } 128 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 129 cnt = 0; 130 for (i=0; i<m; i++) { 131 na = ia[i+1] - ia[i]; 132 nb = ib[i+1] - ib[i]; 133 if (!na && !nb) continue; 134 aa = aav + ia[i]; 135 for (j=0; j<na;j++) { 136 if (aa[j] != 0.0) { 137 rows[cnt++] = rstart + i; 138 goto ok2; 139 } 140 } 141 bb = bav + ib[i]; 142 for (j=0; j<nb; j++) { 143 if (bb[j] != 0.0) { 144 rows[cnt++] = rstart + i; 145 goto ok2; 146 } 147 } 148 ok2:; 149 } 150 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 151 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 152 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 159 PetscBool cong; 160 161 PetscFunctionBegin; 162 PetscCall(MatHasCongruentLayouts(Y,&cong)); 163 if (Y->assembled && cong) { 164 PetscCall(MatDiagonalSet(aij->A,D,is)); 165 } else { 166 PetscCall(MatDiagonalSet_Default(Y,D,is)); 167 } 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 174 PetscInt i,rstart,nrows,*rows; 175 176 PetscFunctionBegin; 177 *zrows = NULL; 178 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 179 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 180 for (i=0; i<nrows; i++) rows[i] += rstart; 181 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 188 PetscInt i,m,n,*garray = aij->garray; 189 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 190 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 191 PetscReal *work; 192 const PetscScalar *dummy; 193 194 PetscFunctionBegin; 195 PetscCall(MatGetSize(A,&m,&n)); 196 PetscCall(PetscCalloc1(n,&work)); 197 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 198 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 199 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 200 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 201 if (type == NORM_2) { 202 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 203 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 204 } 205 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 206 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 207 } 208 } else if (type == NORM_1) { 209 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 210 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 211 } 212 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 213 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 214 } 215 } else if (type == NORM_INFINITY) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 221 } 222 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } 229 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 235 } 236 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 237 if (type == NORM_INFINITY) { 238 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 239 } else { 240 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 241 } 242 PetscCall(PetscFree(work)); 243 if (type == NORM_2) { 244 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 245 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 246 for (i=0; i<n; i++) reductions[i] /= m; 247 } 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 252 { 253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 254 IS sis,gis; 255 const PetscInt *isis,*igis; 256 PetscInt n,*iis,nsis,ngis,rstart,i; 257 258 PetscFunctionBegin; 259 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 260 PetscCall(MatFindNonzeroRows(a->B,&gis)); 261 PetscCall(ISGetSize(gis,&ngis)); 262 PetscCall(ISGetSize(sis,&nsis)); 263 PetscCall(ISGetIndices(sis,&isis)); 264 PetscCall(ISGetIndices(gis,&igis)); 265 266 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 267 PetscCall(PetscArraycpy(iis,igis,ngis)); 268 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 269 n = ngis + nsis; 270 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 271 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 272 for (i=0; i<n; i++) iis[i] += rstart; 273 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 274 275 PetscCall(ISRestoreIndices(sis,&isis)); 276 PetscCall(ISRestoreIndices(gis,&igis)); 277 PetscCall(ISDestroy(&sis)); 278 PetscCall(ISDestroy(&gis)); 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Local utility routine that creates a mapping from the global column 284 number to the local number in the off-diagonal part of the local 285 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 286 a slightly higher hash table cost; without it it is not scalable (each processor 287 has an order N integer array but is fast to access. 288 */ 289 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 290 { 291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 292 PetscInt n = aij->B->cmap->n,i; 293 294 PetscFunctionBegin; 295 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 298 for (i=0; i<n; i++) { 299 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 300 } 301 #else 302 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 303 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 304 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 305 #endif 306 PetscFunctionReturn(0); 307 } 308 309 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 310 { \ 311 if (col <= lastcol1) low1 = 0; \ 312 else high1 = nrow1; \ 313 lastcol1 = col;\ 314 while (high1-low1 > 5) { \ 315 t = (low1+high1)/2; \ 316 if (rp1[t] > col) high1 = t; \ 317 else low1 = t; \ 318 } \ 319 for (_i=low1; _i<high1; _i++) { \ 320 if (rp1[_i] > col) break; \ 321 if (rp1[_i] == col) { \ 322 if (addv == ADD_VALUES) { \ 323 ap1[_i] += value; \ 324 /* Not sure LogFlops will slow dow the code or not */ \ 325 (void)PetscLogFlops(1.0); \ 326 } \ 327 else ap1[_i] = value; \ 328 goto a_noinsert; \ 329 } \ 330 } \ 331 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 332 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 333 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 334 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 335 N = nrow1++ - 1; a->nz++; high1++; \ 336 /* shift up all the later entries in this row */ \ 337 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 338 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 339 rp1[_i] = col; \ 340 ap1[_i] = value; \ 341 A->nonzerostate++;\ 342 a_noinsert: ; \ 343 ailen[row] = nrow1; \ 344 } 345 346 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 347 { \ 348 if (col <= lastcol2) low2 = 0; \ 349 else high2 = nrow2; \ 350 lastcol2 = col; \ 351 while (high2-low2 > 5) { \ 352 t = (low2+high2)/2; \ 353 if (rp2[t] > col) high2 = t; \ 354 else low2 = t; \ 355 } \ 356 for (_i=low2; _i<high2; _i++) { \ 357 if (rp2[_i] > col) break; \ 358 if (rp2[_i] == col) { \ 359 if (addv == ADD_VALUES) { \ 360 ap2[_i] += value; \ 361 (void)PetscLogFlops(1.0); \ 362 } \ 363 else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 368 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 369 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 370 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 371 N = nrow2++ - 1; b->nz++; high2++; \ 372 /* shift up all the later entries in this row */ \ 373 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 374 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 375 rp2[_i] = col; \ 376 ap2[_i] = value; \ 377 B->nonzerostate++; \ 378 b_noinsert: ; \ 379 bilen[row] = nrow2; \ 380 } 381 382 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 383 { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 386 PetscInt l,*garray = mat->garray,diag; 387 PetscScalar *aa,*ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 394 row = row - diag; 395 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 396 if (garray[b->j[b->i[row]+l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 400 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row+1]-a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 407 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row+1]-b->i[row]-l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 421 { 422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 423 PetscScalar value = 0.0; 424 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 425 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 426 PetscBool roworiented = aij->roworiented; 427 428 /* Some Variables required in the macro */ 429 Mat A = aij->A; 430 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 431 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 432 PetscBool ignorezeroentries = a->ignorezeroentries; 433 Mat B = aij->B; 434 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 435 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 436 MatScalar *aa,*ba; 437 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 438 PetscInt nonew; 439 MatScalar *ap1,*ap2; 440 441 PetscFunctionBegin; 442 PetscCall(MatSeqAIJGetArray(A,&aa)); 443 PetscCall(MatSeqAIJGetArray(B,&ba)); 444 for (i=0; i<m; i++) { 445 if (im[i] < 0) continue; 446 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 447 if (im[i] >= rstart && im[i] < rend) { 448 row = im[i] - rstart; 449 lastcol1 = -1; 450 rp1 = aj + ai[row]; 451 ap1 = aa + ai[row]; 452 rmax1 = aimax[row]; 453 nrow1 = ailen[row]; 454 low1 = 0; 455 high1 = nrow1; 456 lastcol2 = -1; 457 rp2 = bj + bi[row]; 458 ap2 = ba + bi[row]; 459 rmax2 = bimax[row]; 460 nrow2 = bilen[row]; 461 low2 = 0; 462 high2 = nrow2; 463 464 for (j=0; j<n; j++) { 465 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 466 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 467 if (in[j] >= cstart && in[j] < cend) { 468 col = in[j] - cstart; 469 nonew = a->nonew; 470 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 471 } else if (in[j] < 0) continue; 472 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 473 else { 474 if (mat->was_assembled) { 475 if (!aij->colmap) { 476 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 477 } 478 #if defined(PETSC_USE_CTABLE) 479 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 480 col--; 481 #else 482 col = aij->colmap[in[j]] - 1; 483 #endif 484 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 485 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 486 col = in[j]; 487 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 488 B = aij->B; 489 b = (Mat_SeqAIJ*)B->data; 490 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 491 rp2 = bj + bi[row]; 492 ap2 = ba + bi[row]; 493 rmax2 = bimax[row]; 494 nrow2 = bilen[row]; 495 low2 = 0; 496 high2 = nrow2; 497 bm = aij->B->rmap->n; 498 ba = b->a; 499 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 500 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 501 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 502 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 503 } 504 } else col = in[j]; 505 nonew = b->nonew; 506 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 507 } 508 } 509 } else { 510 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 511 if (!aij->donotstash) { 512 mat->assembled = PETSC_FALSE; 513 if (roworiented) { 514 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 515 } else { 516 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } 518 } 519 } 520 } 521 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 522 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 528 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 529 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 530 */ 531 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 534 Mat A = aij->A; /* diagonal part of the matrix */ 535 Mat B = aij->B; /* offdiagonal part of the matrix */ 536 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 537 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 538 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 539 PetscInt *ailen = a->ilen,*aj = a->j; 540 PetscInt *bilen = b->ilen,*bj = b->j; 541 PetscInt am = aij->A->rmap->n,j; 542 PetscInt diag_so_far = 0,dnz; 543 PetscInt offd_so_far = 0,onz; 544 545 PetscFunctionBegin; 546 /* Iterate over all rows of the matrix */ 547 for (j=0; j<am; j++) { 548 dnz = onz = 0; 549 /* Iterate over all non-zero columns of the current row */ 550 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 551 /* If column is in the diagonal */ 552 if (mat_j[col] >= cstart && mat_j[col] < cend) { 553 aj[diag_so_far++] = mat_j[col] - cstart; 554 dnz++; 555 } else { /* off-diagonal entries */ 556 bj[offd_so_far++] = mat_j[col]; 557 onz++; 558 } 559 } 560 ailen[j] = dnz; 561 bilen[j] = onz; 562 } 563 PetscFunctionReturn(0); 564 } 565 566 /* 567 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 568 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 569 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 570 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 571 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 572 */ 573 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 574 { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 581 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen,*aj = a->j; 583 PetscInt *bilen = b->ilen,*bj = b->j; 584 PetscInt am = aij->A->rmap->n,j; 585 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 587 PetscScalar *aa = a->a,*ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j=0; j<am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag+dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd+onz_row] = mat_j[col]; 604 ba[rowstart_offd+onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 615 { 616 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 617 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 618 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 619 620 PetscFunctionBegin; 621 for (i=0; i<m; i++) { 622 if (idxm[i] < 0) continue; /* negative row */ 623 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 624 if (idxm[i] >= rstart && idxm[i] < rend) { 625 row = idxm[i] - rstart; 626 for (j=0; j<n; j++) { 627 if (idxn[j] < 0) continue; /* negative column */ 628 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 629 if (idxn[j] >= cstart && idxn[j] < cend) { 630 col = idxn[j] - cstart; 631 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 632 } else { 633 if (!aij->colmap) { 634 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 635 } 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 638 col--; 639 #else 640 col = aij->colmap[idxn[j]] - 1; 641 #endif 642 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 643 else { 644 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 645 } 646 } 647 } 648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 654 { 655 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 656 PetscInt nstash,reallocs; 657 658 PetscFunctionBegin; 659 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 660 661 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 662 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 663 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscMPIInt n; 671 PetscInt i,j,rstart,ncols,flg; 672 PetscInt *row,*col; 673 PetscBool other_disassembled; 674 PetscScalar *val; 675 676 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 677 678 PetscFunctionBegin; 679 if (!aij->donotstash && !mat->nooffprocentries) { 680 while (1) { 681 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 682 if (!flg) break; 683 684 for (i=0; i<n;) { 685 /* Now identify the consecutive vals belonging to the same row */ 686 for (j=i,rstart=row[j]; j<n; j++) { 687 if (row[j] != rstart) break; 688 } 689 if (j < n) ncols = j-i; 690 else ncols = n-i; 691 /* Now assemble all these values with a single function call */ 692 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 693 i = j; 694 } 695 } 696 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 697 } 698 #if defined(PETSC_HAVE_DEVICE) 699 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 700 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 701 if (mat->boundtocpu) { 702 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 703 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 704 } 705 #endif 706 PetscCall(MatAssemblyBegin(aij->A,mode)); 707 PetscCall(MatAssemblyEnd(aij->A,mode)); 708 709 /* determine if any processor has disassembled, if so we must 710 also disassemble ourself, in order that we may reassemble. */ 711 /* 712 if nonzero structure of submatrix B cannot change then we know that 713 no processor disassembled thus we can skip this stuff 714 */ 715 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 716 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 717 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 718 PetscCall(MatDisAssemble_MPIAIJ(mat)); 719 } 720 } 721 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 722 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 723 } 724 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 725 #if defined(PETSC_HAVE_DEVICE) 726 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 727 #endif 728 PetscCall(MatAssemblyBegin(aij->B,mode)); 729 PetscCall(MatAssemblyEnd(aij->B,mode)); 730 731 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 732 733 aij->rowvalues = NULL; 734 735 PetscCall(VecDestroy(&aij->diag)); 736 737 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 738 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 739 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 740 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 741 } 742 #if defined(PETSC_HAVE_DEVICE) 743 mat->offloadmask = PETSC_OFFLOAD_BOTH; 744 #endif 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 749 { 750 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 751 752 PetscFunctionBegin; 753 PetscCall(MatZeroEntries(l->A)); 754 PetscCall(MatZeroEntries(l->B)); 755 PetscFunctionReturn(0); 756 } 757 758 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 759 { 760 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 761 PetscObjectState sA, sB; 762 PetscInt *lrows; 763 PetscInt r, len; 764 PetscBool cong, lch, gch; 765 766 PetscFunctionBegin; 767 /* get locally owned rows */ 768 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 769 PetscCall(MatHasCongruentLayouts(A,&cong)); 770 /* fix right hand side if needed */ 771 if (x && b) { 772 const PetscScalar *xx; 773 PetscScalar *bb; 774 775 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 776 PetscCall(VecGetArrayRead(x, &xx)); 777 PetscCall(VecGetArray(b, &bb)); 778 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 779 PetscCall(VecRestoreArrayRead(x, &xx)); 780 PetscCall(VecRestoreArray(b, &bb)); 781 } 782 783 sA = mat->A->nonzerostate; 784 sB = mat->B->nonzerostate; 785 786 if (diag != 0.0 && cong) { 787 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 788 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 789 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 790 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 791 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 792 PetscInt nnwA, nnwB; 793 PetscBool nnzA, nnzB; 794 795 nnwA = aijA->nonew; 796 nnwB = aijB->nonew; 797 nnzA = aijA->keepnonzeropattern; 798 nnzB = aijB->keepnonzeropattern; 799 if (!nnzA) { 800 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 801 aijA->nonew = 0; 802 } 803 if (!nnzB) { 804 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 805 aijB->nonew = 0; 806 } 807 /* Must zero here before the next loop */ 808 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 809 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 810 for (r = 0; r < len; ++r) { 811 const PetscInt row = lrows[r] + A->rmap->rstart; 812 if (row >= A->cmap->N) continue; 813 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 814 } 815 aijA->nonew = nnwA; 816 aijB->nonew = nnwB; 817 } else { 818 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 819 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 820 } 821 PetscCall(PetscFree(lrows)); 822 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 823 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 824 825 /* reduce nonzerostate */ 826 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 827 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 828 if (gch) A->nonzerostate++; 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 833 { 834 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 835 PetscMPIInt n = A->rmap->n; 836 PetscInt i,j,r,m,len = 0; 837 PetscInt *lrows,*owners = A->rmap->range; 838 PetscMPIInt p = 0; 839 PetscSFNode *rrows; 840 PetscSF sf; 841 const PetscScalar *xx; 842 PetscScalar *bb,*mask,*aij_a; 843 Vec xmask,lmask; 844 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 845 const PetscInt *aj, *ii,*ridx; 846 PetscScalar *aa; 847 848 PetscFunctionBegin; 849 /* Create SF where leaves are input rows and roots are owned rows */ 850 PetscCall(PetscMalloc1(n, &lrows)); 851 for (r = 0; r < n; ++r) lrows[r] = -1; 852 PetscCall(PetscMalloc1(N, &rrows)); 853 for (r = 0; r < N; ++r) { 854 const PetscInt idx = rows[r]; 855 PetscCheckFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 856 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 857 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 858 } 859 rrows[r].rank = p; 860 rrows[r].index = rows[r] - owners[p]; 861 } 862 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 863 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 864 /* Collect flags for rows to be zeroed */ 865 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 866 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 867 PetscCall(PetscSFDestroy(&sf)); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 872 /* handle off diagonal part of matrix */ 873 PetscCall(MatCreateVecs(A,&xmask,NULL)); 874 PetscCall(VecDuplicate(l->lvec,&lmask)); 875 PetscCall(VecGetArray(xmask,&bb)); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 PetscCall(VecRestoreArray(xmask,&bb)); 878 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 880 PetscCall(VecDestroy(&xmask)); 881 if (x && b) { /* this code is buggy when the row and column layout don't match */ 882 PetscBool cong; 883 884 PetscCall(MatHasCongruentLayouts(A,&cong)); 885 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 886 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 888 PetscCall(VecGetArrayRead(l->lvec,&xx)); 889 PetscCall(VecGetArray(b,&bb)); 890 } 891 PetscCall(VecGetArray(lmask,&mask)); 892 /* remove zeroed rows of off diagonal matrix */ 893 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 894 ii = aij->i; 895 for (i=0; i<len; i++) { 896 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 897 } 898 /* loop over all elements of off process part of matrix zeroing removed columns*/ 899 if (aij->compressedrow.use) { 900 m = aij->compressedrow.nrows; 901 ii = aij->compressedrow.i; 902 ridx = aij->compressedrow.rindex; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij_a + ii[i]; 907 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[*ridx] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 ridx++; 917 } 918 } else { /* do not use compressed row format */ 919 m = l->B->rmap->n; 920 for (i=0; i<m; i++) { 921 n = ii[i+1] - ii[i]; 922 aj = aij->j + ii[i]; 923 aa = aij_a + ii[i]; 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[i] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 } 933 } 934 if (x && b) { 935 PetscCall(VecRestoreArray(b,&bb)); 936 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 937 } 938 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 939 PetscCall(VecRestoreArray(lmask,&mask)); 940 PetscCall(VecDestroy(&lmask)); 941 PetscCall(PetscFree(lrows)); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 952 { 953 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 954 PetscInt nt; 955 VecScatter Mvctx = a->Mvctx; 956 957 PetscFunctionBegin; 958 PetscCall(VecGetLocalSize(xx,&nt)); 959 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 960 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 961 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 962 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 963 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 971 PetscFunctionBegin; 972 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 973 PetscFunctionReturn(0); 974 } 975 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 VecScatter Mvctx = a->Mvctx; 980 981 PetscFunctionBegin; 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 /* do nondiagonal part */ 995 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 996 /* do local part */ 997 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 998 /* add partial results together */ 999 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1000 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1005 { 1006 MPI_Comm comm; 1007 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1008 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1009 IS Me,Notme; 1010 PetscInt M,N,first,last,*notme,i; 1011 PetscBool lf; 1012 PetscMPIInt size; 1013 1014 PetscFunctionBegin; 1015 /* Easy test: symmetric diagonal block */ 1016 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1017 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1018 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1019 if (!*f) PetscFunctionReturn(0); 1020 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1021 PetscCallMPI(MPI_Comm_size(comm,&size)); 1022 if (size == 1) PetscFunctionReturn(0); 1023 1024 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1025 PetscCall(MatGetSize(Amat,&M,&N)); 1026 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1027 PetscCall(PetscMalloc1(N-last+first,¬me)); 1028 for (i=0; i<first; i++) notme[i] = i; 1029 for (i=last; i<M; i++) notme[i-last+first] = i; 1030 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1031 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1032 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1033 Aoff = Aoffs[0]; 1034 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1035 Boff = Boffs[0]; 1036 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1037 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1038 PetscCall(MatDestroyMatrices(1,&Boffs)); 1039 PetscCall(ISDestroy(&Me)); 1040 PetscCall(ISDestroy(&Notme)); 1041 PetscCall(PetscFree(notme)); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1046 { 1047 PetscFunctionBegin; 1048 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1055 1056 PetscFunctionBegin; 1057 /* do nondiagonal part */ 1058 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1059 /* do local part */ 1060 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1061 /* add partial results together */ 1062 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1063 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1077 PetscCheckFalse(A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1078 PetscCall(MatGetDiagonal(a->A,v)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 1086 PetscFunctionBegin; 1087 PetscCall(MatScale(a->A,aa)); 1088 PetscCall(MatScale(a->B,aa)); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1093 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1094 { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1096 1097 PetscFunctionBegin; 1098 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1099 PetscCall(PetscFree4(aij->Aperm1,aij->Bperm1,aij->Ajmap1,aij->Bjmap1)); 1100 PetscCall(PetscFree4(aij->Aperm2,aij->Bperm2,aij->Ajmap2,aij->Bjmap2)); 1101 PetscCall(PetscFree4(aij->Aimap1,aij->Bimap1,aij->Aimap2,aij->Bimap2)); 1102 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1103 PetscCall(PetscFree(aij->Cperm1)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 #if defined(PETSC_USE_LOG) 1113 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1114 #endif 1115 PetscCall(MatStashDestroy_Private(&mat->stash)); 1116 PetscCall(VecDestroy(&aij->diag)); 1117 PetscCall(MatDestroy(&aij->A)); 1118 PetscCall(MatDestroy(&aij->B)); 1119 #if defined(PETSC_USE_CTABLE) 1120 PetscCall(PetscTableDestroy(&aij->colmap)); 1121 #else 1122 PetscCall(PetscFree(aij->colmap)); 1123 #endif 1124 PetscCall(PetscFree(aij->garray)); 1125 PetscCall(VecDestroy(&aij->lvec)); 1126 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1127 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1128 PetscCall(PetscFree(aij->ld)); 1129 1130 /* Free COO */ 1131 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1132 1133 PetscCall(PetscFree(mat->data)); 1134 1135 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1136 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1137 1138 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1139 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1140 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1141 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1142 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1143 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1144 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1145 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1146 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1147 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1148 #if defined(PETSC_HAVE_CUDA) 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1150 #endif 1151 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1153 #endif 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1157 #endif 1158 #if defined(PETSC_HAVE_SCALAPACK) 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1160 #endif 1161 #if defined(PETSC_HAVE_HYPRE) 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1164 #endif 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1171 #if defined(PETSC_HAVE_MKL_SPARSE) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1173 #endif 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1183 { 1184 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1185 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1186 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1187 const PetscInt *garray = aij->garray; 1188 const PetscScalar *aa,*ba; 1189 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1190 PetscInt *rowlens; 1191 PetscInt *colidxs; 1192 PetscScalar *matvals; 1193 1194 PetscFunctionBegin; 1195 PetscCall(PetscViewerSetUp(viewer)); 1196 1197 M = mat->rmap->N; 1198 N = mat->cmap->N; 1199 m = mat->rmap->n; 1200 rs = mat->rmap->rstart; 1201 cs = mat->cmap->rstart; 1202 nz = A->nz + B->nz; 1203 1204 /* write matrix header */ 1205 header[0] = MAT_FILE_CLASSID; 1206 header[1] = M; header[2] = N; header[3] = nz; 1207 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1208 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1209 1210 /* fill in and store row lengths */ 1211 PetscCall(PetscMalloc1(m,&rowlens)); 1212 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1213 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1214 PetscCall(PetscFree(rowlens)); 1215 1216 /* fill in and store column indices */ 1217 PetscCall(PetscMalloc1(nz,&colidxs)); 1218 for (cnt=0, i=0; i<m; i++) { 1219 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1220 if (garray[B->j[jb]] > cs) break; 1221 colidxs[cnt++] = garray[B->j[jb]]; 1222 } 1223 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1224 colidxs[cnt++] = A->j[ja] + cs; 1225 for (; jb<B->i[i+1]; jb++) 1226 colidxs[cnt++] = garray[B->j[jb]]; 1227 } 1228 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1229 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1230 PetscCall(PetscFree(colidxs)); 1231 1232 /* fill in and store nonzero values */ 1233 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1234 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1235 PetscCall(PetscMalloc1(nz,&matvals)); 1236 for (cnt=0, i=0; i<m; i++) { 1237 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 matvals[cnt++] = ba[jb]; 1240 } 1241 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1242 matvals[cnt++] = aa[ja]; 1243 for (; jb<B->i[i+1]; jb++) 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1247 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1248 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1249 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1250 PetscCall(PetscFree(matvals)); 1251 1252 /* write block size option to the viewer's .info file */ 1253 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 #include <petscdraw.h> 1258 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1259 { 1260 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1261 PetscMPIInt rank = aij->rank,size = aij->size; 1262 PetscBool isdraw,iascii,isbinary; 1263 PetscViewer sviewer; 1264 PetscViewerFormat format; 1265 1266 PetscFunctionBegin; 1267 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1268 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1269 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1270 if (iascii) { 1271 PetscCall(PetscViewerGetFormat(viewer,&format)); 1272 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1273 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1274 PetscCall(PetscMalloc1(size,&nz)); 1275 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1276 for (i=0; i<(PetscInt)size; i++) { 1277 nmax = PetscMax(nmax,nz[i]); 1278 nmin = PetscMin(nmin,nz[i]); 1279 navg += nz[i]; 1280 } 1281 PetscCall(PetscFree(nz)); 1282 navg = navg/size; 1283 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1284 PetscFunctionReturn(0); 1285 } 1286 PetscCall(PetscViewerGetFormat(viewer,&format)); 1287 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1288 MatInfo info; 1289 PetscInt *inodes=NULL; 1290 1291 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1292 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1293 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1294 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1295 if (!inodes) { 1296 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1297 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1298 } else { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1300 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1301 } 1302 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1303 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1304 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1305 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1306 PetscCall(PetscViewerFlush(viewer)); 1307 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1309 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1310 PetscFunctionReturn(0); 1311 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1312 PetscInt inodecount,inodelimit,*inodes; 1313 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1314 if (inodes) { 1315 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1316 } else { 1317 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1318 } 1319 PetscFunctionReturn(0); 1320 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1321 PetscFunctionReturn(0); 1322 } 1323 } else if (isbinary) { 1324 if (size == 1) { 1325 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1326 PetscCall(MatView(aij->A,viewer)); 1327 } else { 1328 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (iascii && size == 1) { 1332 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1333 PetscCall(MatView(aij->A,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (isdraw) { 1336 PetscDraw draw; 1337 PetscBool isnull; 1338 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1339 PetscCall(PetscDrawIsNull(draw,&isnull)); 1340 if (isnull) PetscFunctionReturn(0); 1341 } 1342 1343 { /* assemble the entire matrix onto first processor */ 1344 Mat A = NULL, Av; 1345 IS isrow,iscol; 1346 1347 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1348 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1349 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1350 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1351 /* The commented code uses MatCreateSubMatrices instead */ 1352 /* 1353 Mat *AA, A = NULL, Av; 1354 IS isrow,iscol; 1355 1356 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1357 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1358 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1359 if (rank == 0) { 1360 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1361 A = AA[0]; 1362 Av = AA[0]; 1363 } 1364 PetscCall(MatDestroySubMatrices(1,&AA)); 1365 */ 1366 PetscCall(ISDestroy(&iscol)); 1367 PetscCall(ISDestroy(&isrow)); 1368 /* 1369 Everyone has to call to draw the matrix since the graphics waits are 1370 synchronized across all processors that share the PetscDraw object 1371 */ 1372 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1373 if (rank == 0) { 1374 if (((PetscObject)mat)->name) { 1375 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1376 } 1377 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1378 } 1379 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1380 PetscCall(PetscViewerFlush(viewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1387 { 1388 PetscBool iascii,isdraw,issocket,isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1395 if (iascii || isdraw || isbinary || issocket) { 1396 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1402 { 1403 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1404 Vec bb1 = NULL; 1405 PetscBool hasop; 1406 1407 PetscFunctionBegin; 1408 if (flag == SOR_APPLY_UPPER) { 1409 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1414 PetscCall(VecDuplicate(bb,&bb1)); 1415 } 1416 1417 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1418 if (flag & SOR_ZERO_INITIAL_GUESS) { 1419 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1420 its--; 1421 } 1422 1423 while (its--) { 1424 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1425 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1426 1427 /* update rhs: bb1 = bb - B*x */ 1428 PetscCall(VecScale(mat->lvec,-1.0)); 1429 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1430 1431 /* local sweep */ 1432 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1433 } 1434 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1435 if (flag & SOR_ZERO_INITIAL_GUESS) { 1436 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1437 its--; 1438 } 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec,-1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec,-1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1465 } 1466 } else if (flag & SOR_EISENSTAT) { 1467 Vec xx1; 1468 1469 PetscCall(VecDuplicate(bb,&xx1)); 1470 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1471 1472 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1474 if (!mat->diag) { 1475 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1476 PetscCall(MatGetDiagonal(matin,mat->diag)); 1477 } 1478 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1479 if (hasop) { 1480 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1481 } else { 1482 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1483 } 1484 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1485 1486 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1490 PetscCall(VecAXPY(xx,1.0,xx1)); 1491 PetscCall(VecDestroy(&xx1)); 1492 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1493 1494 PetscCall(VecDestroy(&bb1)); 1495 1496 matin->factorerrortype = mat->A->factorerrortype; 1497 PetscFunctionReturn(0); 1498 } 1499 1500 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1501 { 1502 Mat aA,aB,Aperm; 1503 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1504 PetscScalar *aa,*ba; 1505 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1506 PetscSF rowsf,sf; 1507 IS parcolp = NULL; 1508 PetscBool done; 1509 1510 PetscFunctionBegin; 1511 PetscCall(MatGetLocalSize(A,&m,&n)); 1512 PetscCall(ISGetIndices(rowp,&rwant)); 1513 PetscCall(ISGetIndices(colp,&cwant)); 1514 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1515 1516 /* Invert row permutation to find out where my rows should go */ 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1518 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1519 PetscCall(PetscSFSetFromOptions(rowsf)); 1520 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1521 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1522 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1523 1524 /* Invert column permutation to find out where my columns should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1526 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1527 PetscCall(PetscSFSetFromOptions(sf)); 1528 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1531 PetscCall(PetscSFDestroy(&sf)); 1532 1533 PetscCall(ISRestoreIndices(rowp,&rwant)); 1534 PetscCall(ISRestoreIndices(colp,&cwant)); 1535 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1536 1537 /* Find out where my gcols should go */ 1538 PetscCall(MatGetSize(aB,NULL,&ng)); 1539 PetscCall(PetscMalloc1(ng,&gcdest)); 1540 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1541 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1542 PetscCall(PetscSFSetFromOptions(sf)); 1543 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1544 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1545 PetscCall(PetscSFDestroy(&sf)); 1546 1547 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1548 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1549 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1550 for (i=0; i<m; i++) { 1551 PetscInt row = rdest[i]; 1552 PetscMPIInt rowner; 1553 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1554 for (j=ai[i]; j<ai[i+1]; j++) { 1555 PetscInt col = cdest[aj[j]]; 1556 PetscMPIInt cowner; 1557 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1558 if (rowner == cowner) dnnz[i]++; 1559 else onnz[i]++; 1560 } 1561 for (j=bi[i]; j<bi[i+1]; j++) { 1562 PetscInt col = gcdest[bj[j]]; 1563 PetscMPIInt cowner; 1564 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1565 if (rowner == cowner) dnnz[i]++; 1566 else onnz[i]++; 1567 } 1568 } 1569 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1570 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1571 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1572 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1573 PetscCall(PetscSFDestroy(&rowsf)); 1574 1575 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1576 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1577 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1578 for (i=0; i<m; i++) { 1579 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1580 PetscInt j0,rowlen; 1581 rowlen = ai[i+1] - ai[i]; 1582 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1583 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1584 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1585 } 1586 rowlen = bi[i+1] - bi[i]; 1587 for (j0=j=0; j<rowlen; j0=j) { 1588 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1589 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1590 } 1591 } 1592 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1594 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1595 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1596 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1597 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1598 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1599 PetscCall(PetscFree3(work,rdest,cdest)); 1600 PetscCall(PetscFree(gcdest)); 1601 if (parcolp) PetscCall(ISDestroy(&colp)); 1602 *B = Aperm; 1603 PetscFunctionReturn(0); 1604 } 1605 1606 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1607 { 1608 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1609 1610 PetscFunctionBegin; 1611 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1612 if (ghosts) *ghosts = aij->garray; 1613 PetscFunctionReturn(0); 1614 } 1615 1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1617 { 1618 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1619 Mat A = mat->A,B = mat->B; 1620 PetscLogDouble isend[5],irecv[5]; 1621 1622 PetscFunctionBegin; 1623 info->block_size = 1.0; 1624 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1625 1626 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1627 isend[3] = info->memory; isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1630 1631 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1632 isend[3] += info->memory; isend[4] += info->mallocs; 1633 if (flag == MAT_LOCAL) { 1634 info->nz_used = isend[0]; 1635 info->nz_allocated = isend[1]; 1636 info->nz_unneeded = isend[2]; 1637 info->memory = isend[3]; 1638 info->mallocs = isend[4]; 1639 } else if (flag == MAT_GLOBAL_MAX) { 1640 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1641 1642 info->nz_used = irecv[0]; 1643 info->nz_allocated = irecv[1]; 1644 info->nz_unneeded = irecv[2]; 1645 info->memory = irecv[3]; 1646 info->mallocs = irecv[4]; 1647 } else if (flag == MAT_GLOBAL_SUM) { 1648 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1649 1650 info->nz_used = irecv[0]; 1651 info->nz_allocated = irecv[1]; 1652 info->nz_unneeded = irecv[2]; 1653 info->memory = irecv[3]; 1654 info->mallocs = irecv[4]; 1655 } 1656 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1657 info->fill_ratio_needed = 0; 1658 info->factor_mallocs = 0; 1659 PetscFunctionReturn(0); 1660 } 1661 1662 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1663 { 1664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1665 1666 PetscFunctionBegin; 1667 switch (op) { 1668 case MAT_NEW_NONZERO_LOCATIONS: 1669 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1670 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1671 case MAT_KEEP_NONZERO_PATTERN: 1672 case MAT_NEW_NONZERO_LOCATION_ERR: 1673 case MAT_USE_INODES: 1674 case MAT_IGNORE_ZERO_ENTRIES: 1675 case MAT_FORM_EXPLICIT_TRANSPOSE: 1676 MatCheckPreallocated(A,1); 1677 PetscCall(MatSetOption(a->A,op,flg)); 1678 PetscCall(MatSetOption(a->B,op,flg)); 1679 break; 1680 case MAT_ROW_ORIENTED: 1681 MatCheckPreallocated(A,1); 1682 a->roworiented = flg; 1683 1684 PetscCall(MatSetOption(a->A,op,flg)); 1685 PetscCall(MatSetOption(a->B,op,flg)); 1686 break; 1687 case MAT_FORCE_DIAGONAL_ENTRIES: 1688 case MAT_SORTED_FULL: 1689 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1690 break; 1691 case MAT_IGNORE_OFF_PROC_ENTRIES: 1692 a->donotstash = flg; 1693 break; 1694 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1695 case MAT_SPD: 1696 case MAT_SYMMETRIC: 1697 case MAT_STRUCTURALLY_SYMMETRIC: 1698 case MAT_HERMITIAN: 1699 case MAT_SYMMETRY_ETERNAL: 1700 break; 1701 case MAT_SUBMAT_SINGLEIS: 1702 A->submat_singleis = flg; 1703 break; 1704 case MAT_STRUCTURE_ONLY: 1705 /* The option is handled directly by MatSetOption() */ 1706 break; 1707 default: 1708 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1709 } 1710 PetscFunctionReturn(0); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1716 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1717 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1718 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1719 PetscInt *cmap,*idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1730 PetscInt max = 1,tmp; 1731 for (i=0; i<matin->rmap->n; i++) { 1732 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1736 } 1737 1738 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1742 if (!v) {pvA = NULL; pvB = NULL;} 1743 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1744 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1745 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1746 nztot = nzA + nzB; 1747 1748 cmap = mat->garray; 1749 if (v || idx) { 1750 if (nztot) { 1751 /* Sort by increasing column numbers, assuming A and B already sorted */ 1752 PetscInt imark = -1; 1753 if (v) { 1754 *v = v_p = mat->rowvalues; 1755 for (i=0; i<nzB; i++) { 1756 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1757 else break; 1758 } 1759 imark = i; 1760 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1761 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1762 } 1763 if (idx) { 1764 *idx = idx_p = mat->rowindices; 1765 if (imark > -1) { 1766 for (i=0; i<imark; i++) { 1767 idx_p[i] = cmap[cworkB[i]]; 1768 } 1769 } else { 1770 for (i=0; i<nzB; i++) { 1771 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1772 else break; 1773 } 1774 imark = i; 1775 } 1776 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1777 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1778 } 1779 } else { 1780 if (idx) *idx = NULL; 1781 if (v) *v = NULL; 1782 } 1783 } 1784 *nz = nztot; 1785 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1786 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1791 { 1792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1793 1794 PetscFunctionBegin; 1795 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1796 aij->getrowactive = PETSC_FALSE; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1801 { 1802 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1803 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1804 PetscInt i,j,cstart = mat->cmap->rstart; 1805 PetscReal sum = 0.0; 1806 const MatScalar *v,*amata,*bmata; 1807 1808 PetscFunctionBegin; 1809 if (aij->size == 1) { 1810 PetscCall(MatNorm(aij->A,type,norm)); 1811 } else { 1812 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1813 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1814 if (type == NORM_FROBENIUS) { 1815 v = amata; 1816 for (i=0; i<amat->nz; i++) { 1817 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1818 } 1819 v = bmata; 1820 for (i=0; i<bmat->nz; i++) { 1821 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1822 } 1823 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp,*tmp2; 1828 PetscInt *jj,*garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1831 *norm = 0.0; 1832 v = amata; jj = amat->j; 1833 for (j=0; j<amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1835 } 1836 v = bmata; jj = bmat->j; 1837 for (j=0; j<bmat->nz; j++) { 1838 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1841 for (j=0; j<mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j=0; j<aij->A->rmap->n; j++) { 1850 v = amata + amat->i[j]; 1851 sum = 0.0; 1852 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); v++; 1854 } 1855 v = bmata + bmat->i[j]; 1856 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); v++; 1858 } 1859 if (sum > ntemp) ntemp = sum; 1860 } 1861 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1862 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1863 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1864 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1865 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1871 { 1872 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1873 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1874 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1875 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1876 Mat B,A_diag,*B_diag; 1877 const MatScalar *pbv,*bv; 1878 1879 PetscFunctionBegin; 1880 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1881 ai = Aloc->i; aj = Aloc->j; 1882 bi = Bloc->i; bj = Bloc->j; 1883 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1884 PetscInt *d_nnz,*g_nnz,*o_nnz; 1885 PetscSFNode *oloc; 1886 PETSC_UNUSED PetscSF sf; 1887 1888 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1889 /* compute d_nnz for preallocation */ 1890 PetscCall(PetscArrayzero(d_nnz,na)); 1891 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1892 /* compute local off-diagonal contributions */ 1893 PetscCall(PetscArrayzero(g_nnz,nb)); 1894 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1895 /* map those to global */ 1896 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1897 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1898 PetscCall(PetscSFSetFromOptions(sf)); 1899 PetscCall(PetscArrayzero(o_nnz,na)); 1900 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1901 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1902 PetscCall(PetscSFDestroy(&sf)); 1903 1904 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1905 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1906 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1907 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1908 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1909 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1910 } else { 1911 B = *matout; 1912 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1913 } 1914 1915 b = (Mat_MPIAIJ*)B->data; 1916 A_diag = a->A; 1917 B_diag = &b->A; 1918 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1919 A_diag_ncol = A_diag->cmap->N; 1920 B_diag_ilen = sub_B_diag->ilen; 1921 B_diag_i = sub_B_diag->i; 1922 1923 /* Set ilen for diagonal of B */ 1924 for (i=0; i<A_diag_ncol; i++) { 1925 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1926 } 1927 1928 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1929 very quickly (=without using MatSetValues), because all writes are local. */ 1930 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1931 1932 /* copy over the B part */ 1933 PetscCall(PetscMalloc1(bi[mb],&cols)); 1934 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1935 pbv = bv; 1936 row = A->rmap->rstart; 1937 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1938 cols_tmp = cols; 1939 for (i=0; i<mb; i++) { 1940 ncol = bi[i+1]-bi[i]; 1941 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1942 row++; 1943 pbv += ncol; cols_tmp += ncol; 1944 } 1945 PetscCall(PetscFree(cols)); 1946 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1947 1948 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1949 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1950 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1951 *matout = B; 1952 } else { 1953 PetscCall(MatHeaderMerge(A,&B)); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1959 { 1960 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1961 Mat a = aij->A,b = aij->B; 1962 PetscInt s1,s2,s3; 1963 1964 PetscFunctionBegin; 1965 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1966 if (rr) { 1967 PetscCall(VecGetLocalSize(rr,&s1)); 1968 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1969 /* Overlap communication with computation. */ 1970 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1971 } 1972 if (ll) { 1973 PetscCall(VecGetLocalSize(ll,&s1)); 1974 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1975 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1976 } 1977 /* scale the diagonal block */ 1978 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 1979 1980 if (rr) { 1981 /* Do a scatter end and then right scale the off-diagonal block */ 1982 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1983 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1989 { 1990 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1991 1992 PetscFunctionBegin; 1993 PetscCall(MatSetUnfactored(a->A)); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 1998 { 1999 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2000 Mat a,b,c,d; 2001 PetscBool flg; 2002 2003 PetscFunctionBegin; 2004 a = matA->A; b = matA->B; 2005 c = matB->A; d = matB->B; 2006 2007 PetscCall(MatEqual(a,c,&flg)); 2008 if (flg) { 2009 PetscCall(MatEqual(b,d,&flg)); 2010 } 2011 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2016 { 2017 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2018 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2019 2020 PetscFunctionBegin; 2021 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2022 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2023 /* because of the column compression in the off-processor part of the matrix a->B, 2024 the number of columns in a->B and b->B may be different, hence we cannot call 2025 the MatCopy() directly on the two parts. If need be, we can provide a more 2026 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2027 then copying the submatrices */ 2028 PetscCall(MatCopy_Basic(A,B,str)); 2029 } else { 2030 PetscCall(MatCopy(a->A,b->A,str)); 2031 PetscCall(MatCopy(a->B,b->B,str)); 2032 } 2033 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2038 { 2039 PetscFunctionBegin; 2040 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 /* 2045 Computes the number of nonzeros per row needed for preallocation when X and Y 2046 have different nonzero structure. 2047 */ 2048 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2049 { 2050 PetscInt i,j,k,nzx,nzy; 2051 2052 PetscFunctionBegin; 2053 /* Set the number of nonzeros in the new matrix */ 2054 for (i=0; i<m; i++) { 2055 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2056 nzx = xi[i+1] - xi[i]; 2057 nzy = yi[i+1] - yi[i]; 2058 nnz[i] = 0; 2059 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2060 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2061 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2062 nnz[i]++; 2063 } 2064 for (; k<nzy; k++) nnz[i]++; 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2070 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2071 { 2072 PetscInt m = Y->rmap->N; 2073 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2074 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2075 2076 PetscFunctionBegin; 2077 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2082 { 2083 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2084 2085 PetscFunctionBegin; 2086 if (str == SAME_NONZERO_PATTERN) { 2087 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2088 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2089 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2090 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2091 } else { 2092 Mat B; 2093 PetscInt *nnz_d,*nnz_o; 2094 2095 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2096 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2097 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2098 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2099 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2100 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2101 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2102 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2103 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2104 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2105 PetscCall(MatHeaderMerge(Y,&B)); 2106 PetscCall(PetscFree(nnz_d)); 2107 PetscCall(PetscFree(nnz_o)); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2113 2114 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2115 { 2116 PetscFunctionBegin; 2117 if (PetscDefined(USE_COMPLEX)) { 2118 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2119 2120 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2121 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 2130 PetscFunctionBegin; 2131 PetscCall(MatRealPart(a->A)); 2132 PetscCall(MatRealPart(a->B)); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2137 { 2138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2139 2140 PetscFunctionBegin; 2141 PetscCall(MatImaginaryPart(a->A)); 2142 PetscCall(MatImaginaryPart(a->B)); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2147 { 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 PetscInt i,*idxb = NULL,m = A->rmap->n; 2150 PetscScalar *va,*vv; 2151 Vec vB,vA; 2152 const PetscScalar *vb; 2153 2154 PetscFunctionBegin; 2155 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2156 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2157 2158 PetscCall(VecGetArrayWrite(vA,&va)); 2159 if (idx) { 2160 for (i=0; i<m; i++) { 2161 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2162 } 2163 } 2164 2165 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2166 PetscCall(PetscMalloc1(m,&idxb)); 2167 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2168 2169 PetscCall(VecGetArrayWrite(v,&vv)); 2170 PetscCall(VecGetArrayRead(vB,&vb)); 2171 for (i=0; i<m; i++) { 2172 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2173 vv[i] = vb[i]; 2174 if (idx) idx[i] = a->garray[idxb[i]]; 2175 } else { 2176 vv[i] = va[i]; 2177 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2178 idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2182 PetscCall(VecRestoreArrayWrite(vA,&va)); 2183 PetscCall(VecRestoreArrayRead(vB,&vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2191 { 2192 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2193 PetscInt m = A->rmap->n,n = A->cmap->n; 2194 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2195 PetscInt *cmap = mat->garray; 2196 PetscInt *diagIdx, *offdiagIdx; 2197 Vec diagV, offdiagV; 2198 PetscScalar *a, *diagA, *offdiagA; 2199 const PetscScalar *ba,*bav; 2200 PetscInt r,j,col,ncols,*bi,*bj; 2201 Mat B = mat->B; 2202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2203 2204 PetscFunctionBegin; 2205 /* When a process holds entire A and other processes have no entry */ 2206 if (A->cmap->N == n) { 2207 PetscCall(VecGetArrayWrite(v,&diagA)); 2208 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2209 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2210 PetscCall(VecDestroy(&diagV)); 2211 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2212 PetscFunctionReturn(0); 2213 } else if (n == 0) { 2214 if (m) { 2215 PetscCall(VecGetArrayWrite(v,&a)); 2216 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2217 PetscCall(VecRestoreArrayWrite(v,&a)); 2218 } 2219 PetscFunctionReturn(0); 2220 } 2221 2222 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2223 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2224 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2225 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2226 2227 /* Get offdiagIdx[] for implicit 0.0 */ 2228 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2229 ba = bav; 2230 bi = b->i; 2231 bj = b->j; 2232 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2233 for (r = 0; r < m; r++) { 2234 ncols = bi[r+1] - bi[r]; 2235 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2236 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2237 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2238 offdiagA[r] = 0.0; 2239 2240 /* Find first hole in the cmap */ 2241 for (j=0; j<ncols; j++) { 2242 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2243 if (col > j && j < cstart) { 2244 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2245 break; 2246 } else if (col > j + n && j >= cstart) { 2247 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2248 break; 2249 } 2250 } 2251 if (j == ncols && ncols < A->cmap->N - n) { 2252 /* a hole is outside compressed Bcols */ 2253 if (ncols == 0) { 2254 if (cstart) { 2255 offdiagIdx[r] = 0; 2256 } else offdiagIdx[r] = cend; 2257 } else { /* ncols > 0 */ 2258 offdiagIdx[r] = cmap[ncols-1] + 1; 2259 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2260 } 2261 } 2262 } 2263 2264 for (j=0; j<ncols; j++) { 2265 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2266 ba++; bj++; 2267 } 2268 } 2269 2270 PetscCall(VecGetArrayWrite(v, &a)); 2271 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2272 for (r = 0; r < m; ++r) { 2273 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2274 a[r] = diagA[r]; 2275 if (idx) idx[r] = cstart + diagIdx[r]; 2276 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2277 a[r] = diagA[r]; 2278 if (idx) { 2279 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2280 idx[r] = cstart + diagIdx[r]; 2281 } else idx[r] = offdiagIdx[r]; 2282 } 2283 } else { 2284 a[r] = offdiagA[r]; 2285 if (idx) idx[r] = offdiagIdx[r]; 2286 } 2287 } 2288 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2289 PetscCall(VecRestoreArrayWrite(v, &a)); 2290 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2291 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2292 PetscCall(VecDestroy(&diagV)); 2293 PetscCall(VecDestroy(&offdiagV)); 2294 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2299 { 2300 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2301 PetscInt m = A->rmap->n,n = A->cmap->n; 2302 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2303 PetscInt *cmap = mat->garray; 2304 PetscInt *diagIdx, *offdiagIdx; 2305 Vec diagV, offdiagV; 2306 PetscScalar *a, *diagA, *offdiagA; 2307 const PetscScalar *ba,*bav; 2308 PetscInt r,j,col,ncols,*bi,*bj; 2309 Mat B = mat->B; 2310 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2311 2312 PetscFunctionBegin; 2313 /* When a process holds entire A and other processes have no entry */ 2314 if (A->cmap->N == n) { 2315 PetscCall(VecGetArrayWrite(v,&diagA)); 2316 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2317 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2318 PetscCall(VecDestroy(&diagV)); 2319 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2320 PetscFunctionReturn(0); 2321 } else if (n == 0) { 2322 if (m) { 2323 PetscCall(VecGetArrayWrite(v,&a)); 2324 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2325 PetscCall(VecRestoreArrayWrite(v,&a)); 2326 } 2327 PetscFunctionReturn(0); 2328 } 2329 2330 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2331 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2332 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2333 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2334 2335 /* Get offdiagIdx[] for implicit 0.0 */ 2336 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2337 ba = bav; 2338 bi = b->i; 2339 bj = b->j; 2340 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2341 for (r = 0; r < m; r++) { 2342 ncols = bi[r+1] - bi[r]; 2343 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2344 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2345 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2346 offdiagA[r] = 0.0; 2347 2348 /* Find first hole in the cmap */ 2349 for (j=0; j<ncols; j++) { 2350 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2351 if (col > j && j < cstart) { 2352 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2353 break; 2354 } else if (col > j + n && j >= cstart) { 2355 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2356 break; 2357 } 2358 } 2359 if (j == ncols && ncols < A->cmap->N - n) { 2360 /* a hole is outside compressed Bcols */ 2361 if (ncols == 0) { 2362 if (cstart) { 2363 offdiagIdx[r] = 0; 2364 } else offdiagIdx[r] = cend; 2365 } else { /* ncols > 0 */ 2366 offdiagIdx[r] = cmap[ncols-1] + 1; 2367 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2368 } 2369 } 2370 } 2371 2372 for (j=0; j<ncols; j++) { 2373 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2374 ba++; bj++; 2375 } 2376 } 2377 2378 PetscCall(VecGetArrayWrite(v, &a)); 2379 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2380 for (r = 0; r < m; ++r) { 2381 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2382 a[r] = diagA[r]; 2383 if (idx) idx[r] = cstart + diagIdx[r]; 2384 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2385 a[r] = diagA[r]; 2386 if (idx) { 2387 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2388 idx[r] = cstart + diagIdx[r]; 2389 } else idx[r] = offdiagIdx[r]; 2390 } 2391 } else { 2392 a[r] = offdiagA[r]; 2393 if (idx) idx[r] = offdiagIdx[r]; 2394 } 2395 } 2396 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2397 PetscCall(VecRestoreArrayWrite(v, &a)); 2398 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2399 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2400 PetscCall(VecDestroy(&diagV)); 2401 PetscCall(VecDestroy(&offdiagV)); 2402 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2407 { 2408 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2409 PetscInt m = A->rmap->n,n = A->cmap->n; 2410 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2411 PetscInt *cmap = mat->garray; 2412 PetscInt *diagIdx, *offdiagIdx; 2413 Vec diagV, offdiagV; 2414 PetscScalar *a, *diagA, *offdiagA; 2415 const PetscScalar *ba,*bav; 2416 PetscInt r,j,col,ncols,*bi,*bj; 2417 Mat B = mat->B; 2418 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2419 2420 PetscFunctionBegin; 2421 /* When a process holds entire A and other processes have no entry */ 2422 if (A->cmap->N == n) { 2423 PetscCall(VecGetArrayWrite(v,&diagA)); 2424 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2425 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2426 PetscCall(VecDestroy(&diagV)); 2427 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2428 PetscFunctionReturn(0); 2429 } else if (n == 0) { 2430 if (m) { 2431 PetscCall(VecGetArrayWrite(v,&a)); 2432 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2433 PetscCall(VecRestoreArrayWrite(v,&a)); 2434 } 2435 PetscFunctionReturn(0); 2436 } 2437 2438 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2439 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2440 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2441 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2442 2443 /* Get offdiagIdx[] for implicit 0.0 */ 2444 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2445 ba = bav; 2446 bi = b->i; 2447 bj = b->j; 2448 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2449 for (r = 0; r < m; r++) { 2450 ncols = bi[r+1] - bi[r]; 2451 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2452 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2453 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2454 offdiagA[r] = 0.0; 2455 2456 /* Find first hole in the cmap */ 2457 for (j=0; j<ncols; j++) { 2458 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2459 if (col > j && j < cstart) { 2460 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2461 break; 2462 } else if (col > j + n && j >= cstart) { 2463 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2464 break; 2465 } 2466 } 2467 if (j == ncols && ncols < A->cmap->N - n) { 2468 /* a hole is outside compressed Bcols */ 2469 if (ncols == 0) { 2470 if (cstart) { 2471 offdiagIdx[r] = 0; 2472 } else offdiagIdx[r] = cend; 2473 } else { /* ncols > 0 */ 2474 offdiagIdx[r] = cmap[ncols-1] + 1; 2475 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2476 } 2477 } 2478 } 2479 2480 for (j=0; j<ncols; j++) { 2481 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2482 ba++; bj++; 2483 } 2484 } 2485 2486 PetscCall(VecGetArrayWrite(v, &a)); 2487 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2488 for (r = 0; r < m; ++r) { 2489 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2490 a[r] = diagA[r]; 2491 if (idx) idx[r] = cstart + diagIdx[r]; 2492 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2493 a[r] = diagA[r]; 2494 if (idx) { 2495 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2496 idx[r] = cstart + diagIdx[r]; 2497 } else idx[r] = offdiagIdx[r]; 2498 } 2499 } else { 2500 a[r] = offdiagA[r]; 2501 if (idx) idx[r] = offdiagIdx[r]; 2502 } 2503 } 2504 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2505 PetscCall(VecRestoreArrayWrite(v, &a)); 2506 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2507 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2508 PetscCall(VecDestroy(&diagV)); 2509 PetscCall(VecDestroy(&offdiagV)); 2510 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2515 { 2516 Mat *dummy; 2517 2518 PetscFunctionBegin; 2519 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2520 *newmat = *dummy; 2521 PetscCall(PetscFree(dummy)); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2526 { 2527 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2528 2529 PetscFunctionBegin; 2530 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2531 A->factorerrortype = a->A->factorerrortype; 2532 PetscFunctionReturn(0); 2533 } 2534 2535 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2536 { 2537 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2538 2539 PetscFunctionBegin; 2540 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2541 PetscCall(MatSetRandom(aij->A,rctx)); 2542 if (x->assembled) { 2543 PetscCall(MatSetRandom(aij->B,rctx)); 2544 } else { 2545 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2546 } 2547 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2548 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2549 PetscFunctionReturn(0); 2550 } 2551 2552 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2553 { 2554 PetscFunctionBegin; 2555 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2556 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2557 PetscFunctionReturn(0); 2558 } 2559 2560 /*@ 2561 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2562 2563 Collective on Mat 2564 2565 Input Parameters: 2566 + A - the matrix 2567 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2568 2569 Level: advanced 2570 2571 @*/ 2572 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2573 { 2574 PetscFunctionBegin; 2575 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2580 { 2581 PetscBool sc = PETSC_FALSE,flg; 2582 2583 PetscFunctionBegin; 2584 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2585 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2586 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2587 if (flg) { 2588 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2589 } 2590 PetscOptionsHeadEnd(); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2595 { 2596 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2597 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2598 2599 PetscFunctionBegin; 2600 if (!Y->preallocated) { 2601 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2602 } else if (!aij->nz) { 2603 PetscInt nonew = aij->nonew; 2604 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2605 aij->nonew = nonew; 2606 } 2607 PetscCall(MatShift_Basic(Y,a)); 2608 PetscFunctionReturn(0); 2609 } 2610 2611 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2612 { 2613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2614 2615 PetscFunctionBegin; 2616 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2617 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2618 if (d) { 2619 PetscInt rstart; 2620 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2621 *d += rstart; 2622 2623 } 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2628 { 2629 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2630 2631 PetscFunctionBegin; 2632 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2633 PetscFunctionReturn(0); 2634 } 2635 2636 /* -------------------------------------------------------------------*/ 2637 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2638 MatGetRow_MPIAIJ, 2639 MatRestoreRow_MPIAIJ, 2640 MatMult_MPIAIJ, 2641 /* 4*/ MatMultAdd_MPIAIJ, 2642 MatMultTranspose_MPIAIJ, 2643 MatMultTransposeAdd_MPIAIJ, 2644 NULL, 2645 NULL, 2646 NULL, 2647 /*10*/ NULL, 2648 NULL, 2649 NULL, 2650 MatSOR_MPIAIJ, 2651 MatTranspose_MPIAIJ, 2652 /*15*/ MatGetInfo_MPIAIJ, 2653 MatEqual_MPIAIJ, 2654 MatGetDiagonal_MPIAIJ, 2655 MatDiagonalScale_MPIAIJ, 2656 MatNorm_MPIAIJ, 2657 /*20*/ MatAssemblyBegin_MPIAIJ, 2658 MatAssemblyEnd_MPIAIJ, 2659 MatSetOption_MPIAIJ, 2660 MatZeroEntries_MPIAIJ, 2661 /*24*/ MatZeroRows_MPIAIJ, 2662 NULL, 2663 NULL, 2664 NULL, 2665 NULL, 2666 /*29*/ MatSetUp_MPIAIJ, 2667 NULL, 2668 NULL, 2669 MatGetDiagonalBlock_MPIAIJ, 2670 NULL, 2671 /*34*/ MatDuplicate_MPIAIJ, 2672 NULL, 2673 NULL, 2674 NULL, 2675 NULL, 2676 /*39*/ MatAXPY_MPIAIJ, 2677 MatCreateSubMatrices_MPIAIJ, 2678 MatIncreaseOverlap_MPIAIJ, 2679 MatGetValues_MPIAIJ, 2680 MatCopy_MPIAIJ, 2681 /*44*/ MatGetRowMax_MPIAIJ, 2682 MatScale_MPIAIJ, 2683 MatShift_MPIAIJ, 2684 MatDiagonalSet_MPIAIJ, 2685 MatZeroRowsColumns_MPIAIJ, 2686 /*49*/ MatSetRandom_MPIAIJ, 2687 NULL, 2688 NULL, 2689 NULL, 2690 NULL, 2691 /*54*/ MatFDColoringCreate_MPIXAIJ, 2692 NULL, 2693 MatSetUnfactored_MPIAIJ, 2694 MatPermute_MPIAIJ, 2695 NULL, 2696 /*59*/ MatCreateSubMatrix_MPIAIJ, 2697 MatDestroy_MPIAIJ, 2698 MatView_MPIAIJ, 2699 NULL, 2700 NULL, 2701 /*64*/ NULL, 2702 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2703 NULL, 2704 NULL, 2705 NULL, 2706 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2707 MatGetRowMinAbs_MPIAIJ, 2708 NULL, 2709 NULL, 2710 NULL, 2711 NULL, 2712 /*75*/ MatFDColoringApply_AIJ, 2713 MatSetFromOptions_MPIAIJ, 2714 NULL, 2715 NULL, 2716 MatFindZeroDiagonals_MPIAIJ, 2717 /*80*/ NULL, 2718 NULL, 2719 NULL, 2720 /*83*/ MatLoad_MPIAIJ, 2721 MatIsSymmetric_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*89*/ NULL, 2727 NULL, 2728 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2729 NULL, 2730 NULL, 2731 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2732 NULL, 2733 NULL, 2734 NULL, 2735 MatBindToCPU_MPIAIJ, 2736 /*99*/ MatProductSetFromOptions_MPIAIJ, 2737 NULL, 2738 NULL, 2739 MatConjugate_MPIAIJ, 2740 NULL, 2741 /*104*/MatSetValuesRow_MPIAIJ, 2742 MatRealPart_MPIAIJ, 2743 MatImaginaryPart_MPIAIJ, 2744 NULL, 2745 NULL, 2746 /*109*/NULL, 2747 NULL, 2748 MatGetRowMin_MPIAIJ, 2749 NULL, 2750 MatMissingDiagonal_MPIAIJ, 2751 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2752 NULL, 2753 MatGetGhosts_MPIAIJ, 2754 NULL, 2755 NULL, 2756 /*119*/MatMultDiagonalBlock_MPIAIJ, 2757 NULL, 2758 NULL, 2759 NULL, 2760 MatGetMultiProcBlock_MPIAIJ, 2761 /*124*/MatFindNonzeroRows_MPIAIJ, 2762 MatGetColumnReductions_MPIAIJ, 2763 MatInvertBlockDiagonal_MPIAIJ, 2764 MatInvertVariableBlockDiagonal_MPIAIJ, 2765 MatCreateSubMatricesMPI_MPIAIJ, 2766 /*129*/NULL, 2767 NULL, 2768 NULL, 2769 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2770 NULL, 2771 /*134*/NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 NULL, 2776 /*139*/MatSetBlockSizes_MPIAIJ, 2777 NULL, 2778 NULL, 2779 MatFDColoringSetUp_MPIXAIJ, 2780 MatFindOffBlockDiagonalEntries_MPIAIJ, 2781 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2782 /*145*/NULL, 2783 NULL, 2784 NULL 2785 }; 2786 2787 /* ----------------------------------------------------------------------------------------*/ 2788 2789 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2790 { 2791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2792 2793 PetscFunctionBegin; 2794 PetscCall(MatStoreValues(aij->A)); 2795 PetscCall(MatStoreValues(aij->B)); 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2800 { 2801 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2802 2803 PetscFunctionBegin; 2804 PetscCall(MatRetrieveValues(aij->A)); 2805 PetscCall(MatRetrieveValues(aij->B)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2810 { 2811 Mat_MPIAIJ *b; 2812 PetscMPIInt size; 2813 2814 PetscFunctionBegin; 2815 PetscCall(PetscLayoutSetUp(B->rmap)); 2816 PetscCall(PetscLayoutSetUp(B->cmap)); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 PetscCall(PetscTableDestroy(&b->colmap)); 2821 #else 2822 PetscCall(PetscFree(b->colmap)); 2823 #endif 2824 PetscCall(PetscFree(b->garray)); 2825 PetscCall(VecDestroy(&b->lvec)); 2826 PetscCall(VecScatterDestroy(&b->Mvctx)); 2827 2828 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2829 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2830 PetscCall(MatDestroy(&b->B)); 2831 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2832 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2833 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2834 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2835 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2836 2837 if (!B->preallocated) { 2838 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2839 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2840 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2841 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2842 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2843 } 2844 2845 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2846 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2847 B->preallocated = PETSC_TRUE; 2848 B->was_assembled = PETSC_FALSE; 2849 B->assembled = PETSC_FALSE; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2854 { 2855 Mat_MPIAIJ *b; 2856 2857 PetscFunctionBegin; 2858 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2859 PetscCall(PetscLayoutSetUp(B->rmap)); 2860 PetscCall(PetscLayoutSetUp(B->cmap)); 2861 b = (Mat_MPIAIJ*)B->data; 2862 2863 #if defined(PETSC_USE_CTABLE) 2864 PetscCall(PetscTableDestroy(&b->colmap)); 2865 #else 2866 PetscCall(PetscFree(b->colmap)); 2867 #endif 2868 PetscCall(PetscFree(b->garray)); 2869 PetscCall(VecDestroy(&b->lvec)); 2870 PetscCall(VecScatterDestroy(&b->Mvctx)); 2871 2872 PetscCall(MatResetPreallocation(b->A)); 2873 PetscCall(MatResetPreallocation(b->B)); 2874 B->preallocated = PETSC_TRUE; 2875 B->was_assembled = PETSC_FALSE; 2876 B->assembled = PETSC_FALSE; 2877 PetscFunctionReturn(0); 2878 } 2879 2880 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2881 { 2882 Mat mat; 2883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2884 2885 PetscFunctionBegin; 2886 *newmat = NULL; 2887 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2888 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2889 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2890 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2891 a = (Mat_MPIAIJ*)mat->data; 2892 2893 mat->factortype = matin->factortype; 2894 mat->assembled = matin->assembled; 2895 mat->insertmode = NOT_SET_VALUES; 2896 mat->preallocated = matin->preallocated; 2897 2898 a->size = oldmat->size; 2899 a->rank = oldmat->rank; 2900 a->donotstash = oldmat->donotstash; 2901 a->roworiented = oldmat->roworiented; 2902 a->rowindices = NULL; 2903 a->rowvalues = NULL; 2904 a->getrowactive = PETSC_FALSE; 2905 2906 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2907 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2908 2909 if (oldmat->colmap) { 2910 #if defined(PETSC_USE_CTABLE) 2911 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2912 #else 2913 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2914 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2915 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2916 #endif 2917 } else a->colmap = NULL; 2918 if (oldmat->garray) { 2919 PetscInt len; 2920 len = oldmat->B->cmap->n; 2921 PetscCall(PetscMalloc1(len+1,&a->garray)); 2922 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2923 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2924 } else a->garray = NULL; 2925 2926 /* It may happen MatDuplicate is called with a non-assembled matrix 2927 In fact, MatDuplicate only requires the matrix to be preallocated 2928 This may happen inside a DMCreateMatrix_Shell */ 2929 if (oldmat->lvec) { 2930 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2931 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2932 } 2933 if (oldmat->Mvctx) { 2934 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2935 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2936 } 2937 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2938 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2939 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2941 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2942 *newmat = mat; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2947 { 2948 PetscBool isbinary, ishdf5; 2949 2950 PetscFunctionBegin; 2951 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2952 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2953 /* force binary viewer to load .info file if it has not yet done so */ 2954 PetscCall(PetscViewerSetUp(viewer)); 2955 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2956 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2957 if (isbinary) { 2958 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2959 } else if (ishdf5) { 2960 #if defined(PETSC_HAVE_HDF5) 2961 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2962 #else 2963 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2964 #endif 2965 } else { 2966 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2967 } 2968 PetscFunctionReturn(0); 2969 } 2970 2971 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2972 { 2973 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2974 PetscInt *rowidxs,*colidxs; 2975 PetscScalar *matvals; 2976 2977 PetscFunctionBegin; 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 2980 /* read in matrix header */ 2981 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 2982 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2983 M = header[1]; N = header[2]; nz = header[3]; 2984 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 2985 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 2986 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2987 2988 /* set block sizes from the viewer's .info file */ 2989 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 2990 /* set global sizes if not set already */ 2991 if (mat->rmap->N < 0) mat->rmap->N = M; 2992 if (mat->cmap->N < 0) mat->cmap->N = N; 2993 PetscCall(PetscLayoutSetUp(mat->rmap)); 2994 PetscCall(PetscLayoutSetUp(mat->cmap)); 2995 2996 /* check if the matrix sizes are correct */ 2997 PetscCall(MatGetSize(mat,&rows,&cols)); 2998 PetscCheckFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 2999 3000 /* read in row lengths and build row indices */ 3001 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3002 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3003 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3004 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3005 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3006 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3007 /* read in column indices and matrix values */ 3008 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3009 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3010 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3011 /* store matrix indices and values */ 3012 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3013 PetscCall(PetscFree(rowidxs)); 3014 PetscCall(PetscFree2(colidxs,matvals)); 3015 PetscFunctionReturn(0); 3016 } 3017 3018 /* Not scalable because of ISAllGather() unless getting all columns. */ 3019 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3020 { 3021 IS iscol_local; 3022 PetscBool isstride; 3023 PetscMPIInt lisstride=0,gisstride; 3024 3025 PetscFunctionBegin; 3026 /* check if we are grabbing all columns*/ 3027 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3028 3029 if (isstride) { 3030 PetscInt start,len,mstart,mlen; 3031 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3032 PetscCall(ISGetLocalSize(iscol,&len)); 3033 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3034 if (mstart == start && mlen-mstart == len) lisstride = 1; 3035 } 3036 3037 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3038 if (gisstride) { 3039 PetscInt N; 3040 PetscCall(MatGetSize(mat,NULL,&N)); 3041 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3042 PetscCall(ISSetIdentity(iscol_local)); 3043 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3044 } else { 3045 PetscInt cbs; 3046 PetscCall(ISGetBlockSize(iscol,&cbs)); 3047 PetscCall(ISAllGather(iscol,&iscol_local)); 3048 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3049 } 3050 3051 *isseq = iscol_local; 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* 3056 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3057 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3058 3059 Input Parameters: 3060 mat - matrix 3061 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3062 i.e., mat->rstart <= isrow[i] < mat->rend 3063 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3064 i.e., mat->cstart <= iscol[i] < mat->cend 3065 Output Parameter: 3066 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3067 iscol_o - sequential column index set for retrieving mat->B 3068 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3069 */ 3070 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3071 { 3072 Vec x,cmap; 3073 const PetscInt *is_idx; 3074 PetscScalar *xarray,*cmaparray; 3075 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3076 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3077 Mat B=a->B; 3078 Vec lvec=a->lvec,lcmap; 3079 PetscInt i,cstart,cend,Bn=B->cmap->N; 3080 MPI_Comm comm; 3081 VecScatter Mvctx=a->Mvctx; 3082 3083 PetscFunctionBegin; 3084 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3085 PetscCall(ISGetLocalSize(iscol,&ncols)); 3086 3087 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3088 PetscCall(MatCreateVecs(mat,&x,NULL)); 3089 PetscCall(VecSet(x,-1.0)); 3090 PetscCall(VecDuplicate(x,&cmap)); 3091 PetscCall(VecSet(cmap,-1.0)); 3092 3093 /* Get start indices */ 3094 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3095 isstart -= ncols; 3096 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3097 3098 PetscCall(ISGetIndices(iscol,&is_idx)); 3099 PetscCall(VecGetArray(x,&xarray)); 3100 PetscCall(VecGetArray(cmap,&cmaparray)); 3101 PetscCall(PetscMalloc1(ncols,&idx)); 3102 for (i=0; i<ncols; i++) { 3103 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3104 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3105 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3106 } 3107 PetscCall(VecRestoreArray(x,&xarray)); 3108 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3109 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3110 3111 /* Get iscol_d */ 3112 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3113 PetscCall(ISGetBlockSize(iscol,&i)); 3114 PetscCall(ISSetBlockSize(*iscol_d,i)); 3115 3116 /* Get isrow_d */ 3117 PetscCall(ISGetLocalSize(isrow,&m)); 3118 rstart = mat->rmap->rstart; 3119 PetscCall(PetscMalloc1(m,&idx)); 3120 PetscCall(ISGetIndices(isrow,&is_idx)); 3121 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3122 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3123 3124 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3125 PetscCall(ISGetBlockSize(isrow,&i)); 3126 PetscCall(ISSetBlockSize(*isrow_d,i)); 3127 3128 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3129 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3130 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3131 3132 PetscCall(VecDuplicate(lvec,&lcmap)); 3133 3134 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3135 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3136 3137 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3138 /* off-process column indices */ 3139 count = 0; 3140 PetscCall(PetscMalloc1(Bn,&idx)); 3141 PetscCall(PetscMalloc1(Bn,&cmap1)); 3142 3143 PetscCall(VecGetArray(lvec,&xarray)); 3144 PetscCall(VecGetArray(lcmap,&cmaparray)); 3145 for (i=0; i<Bn; i++) { 3146 if (PetscRealPart(xarray[i]) > -1.0) { 3147 idx[count] = i; /* local column index in off-diagonal part B */ 3148 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3149 count++; 3150 } 3151 } 3152 PetscCall(VecRestoreArray(lvec,&xarray)); 3153 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3154 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3156 /* cannot ensure iscol_o has same blocksize as iscol! */ 3157 3158 PetscCall(PetscFree(idx)); 3159 *garray = cmap1; 3160 3161 PetscCall(VecDestroy(&x)); 3162 PetscCall(VecDestroy(&cmap)); 3163 PetscCall(VecDestroy(&lcmap)); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3168 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3169 { 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3171 Mat M = NULL; 3172 MPI_Comm comm; 3173 IS iscol_d,isrow_d,iscol_o; 3174 Mat Asub = NULL,Bsub = NULL; 3175 PetscInt n; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3179 3180 if (call == MAT_REUSE_MATRIX) { 3181 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3182 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3183 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3184 3185 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3186 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3187 3188 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3189 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3190 3191 /* Update diagonal and off-diagonal portions of submat */ 3192 asub = (Mat_MPIAIJ*)(*submat)->data; 3193 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3194 PetscCall(ISGetLocalSize(iscol_o,&n)); 3195 if (n) { 3196 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3197 } 3198 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3199 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3200 3201 } else { /* call == MAT_INITIAL_MATRIX) */ 3202 const PetscInt *garray; 3203 PetscInt BsubN; 3204 3205 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3206 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3207 3208 /* Create local submatrices Asub and Bsub */ 3209 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3210 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3211 3212 /* Create submatrix M */ 3213 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3214 3215 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3216 asub = (Mat_MPIAIJ*)M->data; 3217 3218 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3219 n = asub->B->cmap->N; 3220 if (BsubN > n) { 3221 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3222 const PetscInt *idx; 3223 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3224 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3225 3226 PetscCall(PetscMalloc1(n,&idx_new)); 3227 j = 0; 3228 PetscCall(ISGetIndices(iscol_o,&idx)); 3229 for (i=0; i<n; i++) { 3230 if (j >= BsubN) break; 3231 while (subgarray[i] > garray[j]) j++; 3232 3233 if (subgarray[i] == garray[j]) { 3234 idx_new[i] = idx[j++]; 3235 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3236 } 3237 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3238 3239 PetscCall(ISDestroy(&iscol_o)); 3240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3241 3242 } else if (BsubN < n) { 3243 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3244 } 3245 3246 PetscCall(PetscFree(garray)); 3247 *submat = M; 3248 3249 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3250 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3251 PetscCall(ISDestroy(&isrow_d)); 3252 3253 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3254 PetscCall(ISDestroy(&iscol_d)); 3255 3256 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3257 PetscCall(ISDestroy(&iscol_o)); 3258 } 3259 PetscFunctionReturn(0); 3260 } 3261 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3263 { 3264 IS iscol_local=NULL,isrow_d; 3265 PetscInt csize; 3266 PetscInt n,i,j,start,end; 3267 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3268 MPI_Comm comm; 3269 3270 PetscFunctionBegin; 3271 /* If isrow has same processor distribution as mat, 3272 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3273 if (call == MAT_REUSE_MATRIX) { 3274 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3275 if (isrow_d) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3278 } else { 3279 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3280 if (iscol_local) { 3281 sameRowDist = PETSC_TRUE; 3282 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3283 } 3284 } 3285 } else { 3286 /* Check if isrow has same processor distribution as mat */ 3287 sameDist[0] = PETSC_FALSE; 3288 PetscCall(ISGetLocalSize(isrow,&n)); 3289 if (!n) { 3290 sameDist[0] = PETSC_TRUE; 3291 } else { 3292 PetscCall(ISGetMinMax(isrow,&i,&j)); 3293 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3294 if (i >= start && j < end) { 3295 sameDist[0] = PETSC_TRUE; 3296 } 3297 } 3298 3299 /* Check if iscol has same processor distribution as mat */ 3300 sameDist[1] = PETSC_FALSE; 3301 PetscCall(ISGetLocalSize(iscol,&n)); 3302 if (!n) { 3303 sameDist[1] = PETSC_TRUE; 3304 } else { 3305 PetscCall(ISGetMinMax(iscol,&i,&j)); 3306 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3307 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3308 } 3309 3310 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3311 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3312 sameRowDist = tsameDist[0]; 3313 } 3314 3315 if (sameRowDist) { 3316 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3317 /* isrow and iscol have same processor distribution as mat */ 3318 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3319 PetscFunctionReturn(0); 3320 } else { /* sameRowDist */ 3321 /* isrow has same processor distribution as mat */ 3322 if (call == MAT_INITIAL_MATRIX) { 3323 PetscBool sorted; 3324 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3325 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3326 PetscCall(ISGetSize(iscol,&i)); 3327 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3328 3329 PetscCall(ISSorted(iscol_local,&sorted)); 3330 if (sorted) { 3331 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3332 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3333 PetscFunctionReturn(0); 3334 } 3335 } else { /* call == MAT_REUSE_MATRIX */ 3336 IS iscol_sub; 3337 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3338 if (iscol_sub) { 3339 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3340 PetscFunctionReturn(0); 3341 } 3342 } 3343 } 3344 } 3345 3346 /* General case: iscol -> iscol_local which has global size of iscol */ 3347 if (call == MAT_REUSE_MATRIX) { 3348 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3349 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3350 } else { 3351 if (!iscol_local) { 3352 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3353 } 3354 } 3355 3356 PetscCall(ISGetLocalSize(iscol,&csize)); 3357 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3358 3359 if (call == MAT_INITIAL_MATRIX) { 3360 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3361 PetscCall(ISDestroy(&iscol_local)); 3362 } 3363 PetscFunctionReturn(0); 3364 } 3365 3366 /*@C 3367 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3368 and "off-diagonal" part of the matrix in CSR format. 3369 3370 Collective 3371 3372 Input Parameters: 3373 + comm - MPI communicator 3374 . A - "diagonal" portion of matrix 3375 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3376 - garray - global index of B columns 3377 3378 Output Parameter: 3379 . mat - the matrix, with input A as its local diagonal matrix 3380 Level: advanced 3381 3382 Notes: 3383 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3384 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3385 3386 .seealso: MatCreateMPIAIJWithSplitArrays() 3387 @*/ 3388 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3389 { 3390 Mat_MPIAIJ *maij; 3391 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3392 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3393 const PetscScalar *oa; 3394 Mat Bnew; 3395 PetscInt m,n,N; 3396 3397 PetscFunctionBegin; 3398 PetscCall(MatCreate(comm,mat)); 3399 PetscCall(MatGetSize(A,&m,&n)); 3400 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3401 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3402 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3403 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3404 3405 /* Get global columns of mat */ 3406 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3407 3408 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3409 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3410 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3411 maij = (Mat_MPIAIJ*)(*mat)->data; 3412 3413 (*mat)->preallocated = PETSC_TRUE; 3414 3415 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3416 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3417 3418 /* Set A as diagonal portion of *mat */ 3419 maij->A = A; 3420 3421 nz = oi[m]; 3422 for (i=0; i<nz; i++) { 3423 col = oj[i]; 3424 oj[i] = garray[col]; 3425 } 3426 3427 /* Set Bnew as off-diagonal portion of *mat */ 3428 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3429 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3430 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3431 bnew = (Mat_SeqAIJ*)Bnew->data; 3432 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3433 maij->B = Bnew; 3434 3435 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3436 3437 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3438 b->free_a = PETSC_FALSE; 3439 b->free_ij = PETSC_FALSE; 3440 PetscCall(MatDestroy(&B)); 3441 3442 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3443 bnew->free_a = PETSC_TRUE; 3444 bnew->free_ij = PETSC_TRUE; 3445 3446 /* condense columns of maij->B */ 3447 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3448 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3449 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3450 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3451 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3452 PetscFunctionReturn(0); 3453 } 3454 3455 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3456 3457 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3458 { 3459 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3460 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3461 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3462 Mat M,Msub,B=a->B; 3463 MatScalar *aa; 3464 Mat_SeqAIJ *aij; 3465 PetscInt *garray = a->garray,*colsub,Ncols; 3466 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3467 IS iscol_sub,iscmap; 3468 const PetscInt *is_idx,*cmap; 3469 PetscBool allcolumns=PETSC_FALSE; 3470 MPI_Comm comm; 3471 3472 PetscFunctionBegin; 3473 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3474 if (call == MAT_REUSE_MATRIX) { 3475 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3476 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3477 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3478 3479 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3480 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3481 3482 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3483 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3484 3485 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3486 3487 } else { /* call == MAT_INITIAL_MATRIX) */ 3488 PetscBool flg; 3489 3490 PetscCall(ISGetLocalSize(iscol,&n)); 3491 PetscCall(ISGetSize(iscol,&Ncols)); 3492 3493 /* (1) iscol -> nonscalable iscol_local */ 3494 /* Check for special case: each processor gets entire matrix columns */ 3495 PetscCall(ISIdentity(iscol_local,&flg)); 3496 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3497 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3498 if (allcolumns) { 3499 iscol_sub = iscol_local; 3500 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3501 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3502 3503 } else { 3504 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3505 PetscInt *idx,*cmap1,k; 3506 PetscCall(PetscMalloc1(Ncols,&idx)); 3507 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3508 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3509 count = 0; 3510 k = 0; 3511 for (i=0; i<Ncols; i++) { 3512 j = is_idx[i]; 3513 if (j >= cstart && j < cend) { 3514 /* diagonal part of mat */ 3515 idx[count] = j; 3516 cmap1[count++] = i; /* column index in submat */ 3517 } else if (Bn) { 3518 /* off-diagonal part of mat */ 3519 if (j == garray[k]) { 3520 idx[count] = j; 3521 cmap1[count++] = i; /* column index in submat */ 3522 } else if (j > garray[k]) { 3523 while (j > garray[k] && k < Bn-1) k++; 3524 if (j == garray[k]) { 3525 idx[count] = j; 3526 cmap1[count++] = i; /* column index in submat */ 3527 } 3528 } 3529 } 3530 } 3531 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3532 3533 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3534 PetscCall(ISGetBlockSize(iscol,&cbs)); 3535 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3536 3537 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3538 } 3539 3540 /* (3) Create sequential Msub */ 3541 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3542 } 3543 3544 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3545 aij = (Mat_SeqAIJ*)(Msub)->data; 3546 ii = aij->i; 3547 PetscCall(ISGetIndices(iscmap,&cmap)); 3548 3549 /* 3550 m - number of local rows 3551 Ncols - number of columns (same on all processors) 3552 rstart - first row in new global matrix generated 3553 */ 3554 PetscCall(MatGetSize(Msub,&m,NULL)); 3555 3556 if (call == MAT_INITIAL_MATRIX) { 3557 /* (4) Create parallel newmat */ 3558 PetscMPIInt rank,size; 3559 PetscInt csize; 3560 3561 PetscCallMPI(MPI_Comm_size(comm,&size)); 3562 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3563 3564 /* 3565 Determine the number of non-zeros in the diagonal and off-diagonal 3566 portions of the matrix in order to do correct preallocation 3567 */ 3568 3569 /* first get start and end of "diagonal" columns */ 3570 PetscCall(ISGetLocalSize(iscol,&csize)); 3571 if (csize == PETSC_DECIDE) { 3572 PetscCall(ISGetSize(isrow,&mglobal)); 3573 if (mglobal == Ncols) { /* square matrix */ 3574 nlocal = m; 3575 } else { 3576 nlocal = Ncols/size + ((Ncols % size) > rank); 3577 } 3578 } else { 3579 nlocal = csize; 3580 } 3581 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3582 rstart = rend - nlocal; 3583 PetscCheckFalse(rank == size - 1 && rend != Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3584 3585 /* next, compute all the lengths */ 3586 jj = aij->j; 3587 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3588 olens = dlens + m; 3589 for (i=0; i<m; i++) { 3590 jend = ii[i+1] - ii[i]; 3591 olen = 0; 3592 dlen = 0; 3593 for (j=0; j<jend; j++) { 3594 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3595 else dlen++; 3596 jj++; 3597 } 3598 olens[i] = olen; 3599 dlens[i] = dlen; 3600 } 3601 3602 PetscCall(ISGetBlockSize(isrow,&bs)); 3603 PetscCall(ISGetBlockSize(iscol,&cbs)); 3604 3605 PetscCall(MatCreate(comm,&M)); 3606 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3607 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3608 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3609 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3610 PetscCall(PetscFree(dlens)); 3611 3612 } else { /* call == MAT_REUSE_MATRIX */ 3613 M = *newmat; 3614 PetscCall(MatGetLocalSize(M,&i,NULL)); 3615 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3616 PetscCall(MatZeroEntries(M)); 3617 /* 3618 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3619 rather than the slower MatSetValues(). 3620 */ 3621 M->was_assembled = PETSC_TRUE; 3622 M->assembled = PETSC_FALSE; 3623 } 3624 3625 /* (5) Set values of Msub to *newmat */ 3626 PetscCall(PetscMalloc1(count,&colsub)); 3627 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3628 3629 jj = aij->j; 3630 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3631 for (i=0; i<m; i++) { 3632 row = rstart + i; 3633 nz = ii[i+1] - ii[i]; 3634 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3635 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3636 jj += nz; aa += nz; 3637 } 3638 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3639 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3640 3641 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3642 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3643 3644 PetscCall(PetscFree(colsub)); 3645 3646 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3647 if (call == MAT_INITIAL_MATRIX) { 3648 *newmat = M; 3649 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3650 PetscCall(MatDestroy(&Msub)); 3651 3652 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3653 PetscCall(ISDestroy(&iscol_sub)); 3654 3655 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3656 PetscCall(ISDestroy(&iscmap)); 3657 3658 if (iscol_local) { 3659 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3660 PetscCall(ISDestroy(&iscol_local)); 3661 } 3662 } 3663 PetscFunctionReturn(0); 3664 } 3665 3666 /* 3667 Not great since it makes two copies of the submatrix, first an SeqAIJ 3668 in local and then by concatenating the local matrices the end result. 3669 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3670 3671 Note: This requires a sequential iscol with all indices. 3672 */ 3673 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3674 { 3675 PetscMPIInt rank,size; 3676 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3677 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3678 Mat M,Mreuse; 3679 MatScalar *aa,*vwork; 3680 MPI_Comm comm; 3681 Mat_SeqAIJ *aij; 3682 PetscBool colflag,allcolumns=PETSC_FALSE; 3683 3684 PetscFunctionBegin; 3685 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3686 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3687 PetscCallMPI(MPI_Comm_size(comm,&size)); 3688 3689 /* Check for special case: each processor gets entire matrix columns */ 3690 PetscCall(ISIdentity(iscol,&colflag)); 3691 PetscCall(ISGetLocalSize(iscol,&n)); 3692 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3693 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3694 3695 if (call == MAT_REUSE_MATRIX) { 3696 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3697 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3698 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3699 } else { 3700 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3701 } 3702 3703 /* 3704 m - number of local rows 3705 n - number of columns (same on all processors) 3706 rstart - first row in new global matrix generated 3707 */ 3708 PetscCall(MatGetSize(Mreuse,&m,&n)); 3709 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3710 if (call == MAT_INITIAL_MATRIX) { 3711 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3712 ii = aij->i; 3713 jj = aij->j; 3714 3715 /* 3716 Determine the number of non-zeros in the diagonal and off-diagonal 3717 portions of the matrix in order to do correct preallocation 3718 */ 3719 3720 /* first get start and end of "diagonal" columns */ 3721 if (csize == PETSC_DECIDE) { 3722 PetscCall(ISGetSize(isrow,&mglobal)); 3723 if (mglobal == n) { /* square matrix */ 3724 nlocal = m; 3725 } else { 3726 nlocal = n/size + ((n % size) > rank); 3727 } 3728 } else { 3729 nlocal = csize; 3730 } 3731 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3732 rstart = rend - nlocal; 3733 PetscCheckFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3734 3735 /* next, compute all the lengths */ 3736 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3737 olens = dlens + m; 3738 for (i=0; i<m; i++) { 3739 jend = ii[i+1] - ii[i]; 3740 olen = 0; 3741 dlen = 0; 3742 for (j=0; j<jend; j++) { 3743 if (*jj < rstart || *jj >= rend) olen++; 3744 else dlen++; 3745 jj++; 3746 } 3747 olens[i] = olen; 3748 dlens[i] = dlen; 3749 } 3750 PetscCall(MatCreate(comm,&M)); 3751 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3752 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3753 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3754 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3755 PetscCall(PetscFree(dlens)); 3756 } else { 3757 PetscInt ml,nl; 3758 3759 M = *newmat; 3760 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3761 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3762 PetscCall(MatZeroEntries(M)); 3763 /* 3764 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3765 rather than the slower MatSetValues(). 3766 */ 3767 M->was_assembled = PETSC_TRUE; 3768 M->assembled = PETSC_FALSE; 3769 } 3770 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3771 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3772 ii = aij->i; 3773 jj = aij->j; 3774 3775 /* trigger copy to CPU if needed */ 3776 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3777 for (i=0; i<m; i++) { 3778 row = rstart + i; 3779 nz = ii[i+1] - ii[i]; 3780 cwork = jj; jj += nz; 3781 vwork = aa; aa += nz; 3782 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3783 } 3784 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3785 3786 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3787 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3788 *newmat = M; 3789 3790 /* save submatrix used in processor for next request */ 3791 if (call == MAT_INITIAL_MATRIX) { 3792 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3793 PetscCall(MatDestroy(&Mreuse)); 3794 } 3795 PetscFunctionReturn(0); 3796 } 3797 3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3799 { 3800 PetscInt m,cstart, cend,j,nnz,i,d; 3801 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3802 const PetscInt *JJ; 3803 PetscBool nooffprocentries; 3804 3805 PetscFunctionBegin; 3806 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3807 3808 PetscCall(PetscLayoutSetUp(B->rmap)); 3809 PetscCall(PetscLayoutSetUp(B->cmap)); 3810 m = B->rmap->n; 3811 cstart = B->cmap->rstart; 3812 cend = B->cmap->rend; 3813 rstart = B->rmap->rstart; 3814 3815 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3816 3817 if (PetscDefined(USE_DEBUG)) { 3818 for (i=0; i<m; i++) { 3819 nnz = Ii[i+1]- Ii[i]; 3820 JJ = J + Ii[i]; 3821 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3822 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3823 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3824 } 3825 } 3826 3827 for (i=0; i<m; i++) { 3828 nnz = Ii[i+1]- Ii[i]; 3829 JJ = J + Ii[i]; 3830 nnz_max = PetscMax(nnz_max,nnz); 3831 d = 0; 3832 for (j=0; j<nnz; j++) { 3833 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3834 } 3835 d_nnz[i] = d; 3836 o_nnz[i] = nnz - d; 3837 } 3838 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3839 PetscCall(PetscFree2(d_nnz,o_nnz)); 3840 3841 for (i=0; i<m; i++) { 3842 ii = i + rstart; 3843 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3844 } 3845 nooffprocentries = B->nooffprocentries; 3846 B->nooffprocentries = PETSC_TRUE; 3847 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3848 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3849 B->nooffprocentries = nooffprocentries; 3850 3851 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3852 PetscFunctionReturn(0); 3853 } 3854 3855 /*@ 3856 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3857 (the default parallel PETSc format). 3858 3859 Collective 3860 3861 Input Parameters: 3862 + B - the matrix 3863 . i - the indices into j for the start of each local row (starts with zero) 3864 . j - the column indices for each local row (starts with zero) 3865 - v - optional values in the matrix 3866 3867 Level: developer 3868 3869 Notes: 3870 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3871 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3872 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3873 3874 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3875 3876 The format which is used for the sparse matrix input, is equivalent to a 3877 row-major ordering.. i.e for the following matrix, the input data expected is 3878 as shown 3879 3880 $ 1 0 0 3881 $ 2 0 3 P0 3882 $ ------- 3883 $ 4 5 6 P1 3884 $ 3885 $ Process0 [P0]: rows_owned=[0,1] 3886 $ i = {0,1,3} [size = nrow+1 = 2+1] 3887 $ j = {0,0,2} [size = 3] 3888 $ v = {1,2,3} [size = 3] 3889 $ 3890 $ Process1 [P1]: rows_owned=[2] 3891 $ i = {0,3} [size = nrow+1 = 1+1] 3892 $ j = {0,1,2} [size = 3] 3893 $ v = {4,5,6} [size = 3] 3894 3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3896 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3897 @*/ 3898 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3899 { 3900 PetscFunctionBegin; 3901 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3902 PetscFunctionReturn(0); 3903 } 3904 3905 /*@C 3906 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3907 (the default parallel PETSc format). For good matrix assembly performance 3908 the user should preallocate the matrix storage by setting the parameters 3909 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3910 performance can be increased by more than a factor of 50. 3911 3912 Collective 3913 3914 Input Parameters: 3915 + B - the matrix 3916 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3917 (same value is used for all local rows) 3918 . d_nnz - array containing the number of nonzeros in the various rows of the 3919 DIAGONAL portion of the local submatrix (possibly different for each row) 3920 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3921 The size of this array is equal to the number of local rows, i.e 'm'. 3922 For matrices that will be factored, you must leave room for (and set) 3923 the diagonal entry even if it is zero. 3924 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3925 submatrix (same value is used for all local rows). 3926 - o_nnz - array containing the number of nonzeros in the various rows of the 3927 OFF-DIAGONAL portion of the local submatrix (possibly different for 3928 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3929 structure. The size of this array is equal to the number 3930 of local rows, i.e 'm'. 3931 3932 If the *_nnz parameter is given then the *_nz parameter is ignored 3933 3934 The AIJ format (also called the Yale sparse matrix format or 3935 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3936 storage. The stored row and column indices begin with zero. 3937 See Users-Manual: ch_mat for details. 3938 3939 The parallel matrix is partitioned such that the first m0 rows belong to 3940 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3941 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3942 3943 The DIAGONAL portion of the local submatrix of a processor can be defined 3944 as the submatrix which is obtained by extraction the part corresponding to 3945 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3946 first row that belongs to the processor, r2 is the last row belonging to 3947 the this processor, and c1-c2 is range of indices of the local part of a 3948 vector suitable for applying the matrix to. This is an mxn matrix. In the 3949 common case of a square matrix, the row and column ranges are the same and 3950 the DIAGONAL part is also square. The remaining portion of the local 3951 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3952 3953 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3954 3955 You can call MatGetInfo() to get information on how effective the preallocation was; 3956 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3957 You can also run with the option -info and look for messages with the string 3958 malloc in them to see if additional memory allocation was needed. 3959 3960 Example usage: 3961 3962 Consider the following 8x8 matrix with 34 non-zero values, that is 3963 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3964 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3965 as follows: 3966 3967 .vb 3968 1 2 0 | 0 3 0 | 0 4 3969 Proc0 0 5 6 | 7 0 0 | 8 0 3970 9 0 10 | 11 0 0 | 12 0 3971 ------------------------------------- 3972 13 0 14 | 15 16 17 | 0 0 3973 Proc1 0 18 0 | 19 20 21 | 0 0 3974 0 0 0 | 22 23 0 | 24 0 3975 ------------------------------------- 3976 Proc2 25 26 27 | 0 0 28 | 29 0 3977 30 0 0 | 31 32 33 | 0 34 3978 .ve 3979 3980 This can be represented as a collection of submatrices as: 3981 3982 .vb 3983 A B C 3984 D E F 3985 G H I 3986 .ve 3987 3988 Where the submatrices A,B,C are owned by proc0, D,E,F are 3989 owned by proc1, G,H,I are owned by proc2. 3990 3991 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3992 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3993 The 'M','N' parameters are 8,8, and have the same values on all procs. 3994 3995 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3996 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3997 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3998 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3999 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4000 matrix, ans [DF] as another SeqAIJ matrix. 4001 4002 When d_nz, o_nz parameters are specified, d_nz storage elements are 4003 allocated for every row of the local diagonal submatrix, and o_nz 4004 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4005 One way to choose d_nz and o_nz is to use the max nonzerors per local 4006 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4007 In this case, the values of d_nz,o_nz are: 4008 .vb 4009 proc0 : dnz = 2, o_nz = 2 4010 proc1 : dnz = 3, o_nz = 2 4011 proc2 : dnz = 1, o_nz = 4 4012 .ve 4013 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4014 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4015 for proc3. i.e we are using 12+15+10=37 storage locations to store 4016 34 values. 4017 4018 When d_nnz, o_nnz parameters are specified, the storage is specified 4019 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4020 In the above case the values for d_nnz,o_nnz are: 4021 .vb 4022 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4023 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4024 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4025 .ve 4026 Here the space allocated is sum of all the above values i.e 34, and 4027 hence pre-allocation is perfect. 4028 4029 Level: intermediate 4030 4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4032 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4033 @*/ 4034 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4035 { 4036 PetscFunctionBegin; 4037 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4038 PetscValidType(B,1); 4039 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4040 PetscFunctionReturn(0); 4041 } 4042 4043 /*@ 4044 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4045 CSR format for the local rows. 4046 4047 Collective 4048 4049 Input Parameters: 4050 + comm - MPI communicator 4051 . m - number of local rows (Cannot be PETSC_DECIDE) 4052 . n - This value should be the same as the local size used in creating the 4053 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4054 calculated if N is given) For square matrices n is almost always m. 4055 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4056 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4057 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4058 . j - column indices 4059 - a - matrix values 4060 4061 Output Parameter: 4062 . mat - the matrix 4063 4064 Level: intermediate 4065 4066 Notes: 4067 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4068 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4069 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4070 4071 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4072 4073 The format which is used for the sparse matrix input, is equivalent to a 4074 row-major ordering.. i.e for the following matrix, the input data expected is 4075 as shown 4076 4077 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4078 4079 $ 1 0 0 4080 $ 2 0 3 P0 4081 $ ------- 4082 $ 4 5 6 P1 4083 $ 4084 $ Process0 [P0]: rows_owned=[0,1] 4085 $ i = {0,1,3} [size = nrow+1 = 2+1] 4086 $ j = {0,0,2} [size = 3] 4087 $ v = {1,2,3} [size = 3] 4088 $ 4089 $ Process1 [P1]: rows_owned=[2] 4090 $ i = {0,3} [size = nrow+1 = 1+1] 4091 $ j = {0,1,2} [size = 3] 4092 $ v = {4,5,6} [size = 3] 4093 4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4095 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4096 @*/ 4097 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4098 { 4099 PetscFunctionBegin; 4100 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4101 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4102 PetscCall(MatCreate(comm,mat)); 4103 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4104 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4105 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4106 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4107 PetscFunctionReturn(0); 4108 } 4109 4110 /*@ 4111 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4112 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4113 4114 Collective 4115 4116 Input Parameters: 4117 + mat - the matrix 4118 . m - number of local rows (Cannot be PETSC_DECIDE) 4119 . n - This value should be the same as the local size used in creating the 4120 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4121 calculated if N is given) For square matrices n is almost always m. 4122 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4123 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4124 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4125 . J - column indices 4126 - v - matrix values 4127 4128 Level: intermediate 4129 4130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4131 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4132 @*/ 4133 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4134 { 4135 PetscInt cstart,nnz,i,j; 4136 PetscInt *ld; 4137 PetscBool nooffprocentries; 4138 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4139 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4140 PetscScalar *ad,*ao; 4141 const PetscInt *Adi = Ad->i; 4142 PetscInt ldi,Iii,md; 4143 4144 PetscFunctionBegin; 4145 PetscCheckFalse(Ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4146 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4147 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4148 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4149 4150 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4151 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4152 cstart = mat->cmap->rstart; 4153 if (!Aij->ld) { 4154 /* count number of entries below block diagonal */ 4155 PetscCall(PetscCalloc1(m,&ld)); 4156 Aij->ld = ld; 4157 for (i=0; i<m; i++) { 4158 nnz = Ii[i+1]- Ii[i]; 4159 j = 0; 4160 while (J[j] < cstart && j < nnz) {j++;} 4161 J += nnz; 4162 ld[i] = j; 4163 } 4164 } else { 4165 ld = Aij->ld; 4166 } 4167 4168 for (i=0; i<m; i++) { 4169 nnz = Ii[i+1]- Ii[i]; 4170 Iii = Ii[i]; 4171 ldi = ld[i]; 4172 md = Adi[i+1]-Adi[i]; 4173 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4174 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4175 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4176 ad += md; 4177 ao += nnz - md; 4178 } 4179 nooffprocentries = mat->nooffprocentries; 4180 mat->nooffprocentries = PETSC_TRUE; 4181 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4182 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4183 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4184 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4185 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4186 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4187 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4188 mat->nooffprocentries = nooffprocentries; 4189 PetscFunctionReturn(0); 4190 } 4191 4192 /*@C 4193 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4194 (the default parallel PETSc format). For good matrix assembly performance 4195 the user should preallocate the matrix storage by setting the parameters 4196 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4197 performance can be increased by more than a factor of 50. 4198 4199 Collective 4200 4201 Input Parameters: 4202 + comm - MPI communicator 4203 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4204 This value should be the same as the local size used in creating the 4205 y vector for the matrix-vector product y = Ax. 4206 . n - This value should be the same as the local size used in creating the 4207 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4208 calculated if N is given) For square matrices n is almost always m. 4209 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4210 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4211 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4212 (same value is used for all local rows) 4213 . d_nnz - array containing the number of nonzeros in the various rows of the 4214 DIAGONAL portion of the local submatrix (possibly different for each row) 4215 or NULL, if d_nz is used to specify the nonzero structure. 4216 The size of this array is equal to the number of local rows, i.e 'm'. 4217 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4218 submatrix (same value is used for all local rows). 4219 - o_nnz - array containing the number of nonzeros in the various rows of the 4220 OFF-DIAGONAL portion of the local submatrix (possibly different for 4221 each row) or NULL, if o_nz is used to specify the nonzero 4222 structure. The size of this array is equal to the number 4223 of local rows, i.e 'm'. 4224 4225 Output Parameter: 4226 . A - the matrix 4227 4228 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4229 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4230 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4231 4232 Notes: 4233 If the *_nnz parameter is given then the *_nz parameter is ignored 4234 4235 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4236 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4237 storage requirements for this matrix. 4238 4239 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4240 processor than it must be used on all processors that share the object for 4241 that argument. 4242 4243 The user MUST specify either the local or global matrix dimensions 4244 (possibly both). 4245 4246 The parallel matrix is partitioned across processors such that the 4247 first m0 rows belong to process 0, the next m1 rows belong to 4248 process 1, the next m2 rows belong to process 2 etc.. where 4249 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4250 values corresponding to [m x N] submatrix. 4251 4252 The columns are logically partitioned with the n0 columns belonging 4253 to 0th partition, the next n1 columns belonging to the next 4254 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4255 4256 The DIAGONAL portion of the local submatrix on any given processor 4257 is the submatrix corresponding to the rows and columns m,n 4258 corresponding to the given processor. i.e diagonal matrix on 4259 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4260 etc. The remaining portion of the local submatrix [m x (N-n)] 4261 constitute the OFF-DIAGONAL portion. The example below better 4262 illustrates this concept. 4263 4264 For a square global matrix we define each processor's diagonal portion 4265 to be its local rows and the corresponding columns (a square submatrix); 4266 each processor's off-diagonal portion encompasses the remainder of the 4267 local matrix (a rectangular submatrix). 4268 4269 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4270 4271 When calling this routine with a single process communicator, a matrix of 4272 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4273 type of communicator, use the construction mechanism 4274 .vb 4275 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4276 .ve 4277 4278 $ MatCreate(...,&A); 4279 $ MatSetType(A,MATMPIAIJ); 4280 $ MatSetSizes(A, m,n,M,N); 4281 $ MatMPIAIJSetPreallocation(A,...); 4282 4283 By default, this format uses inodes (identical nodes) when possible. 4284 We search for consecutive rows with the same nonzero structure, thereby 4285 reusing matrix information to achieve increased efficiency. 4286 4287 Options Database Keys: 4288 + -mat_no_inode - Do not use inodes 4289 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4290 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4291 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4292 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4293 4294 Example usage: 4295 4296 Consider the following 8x8 matrix with 34 non-zero values, that is 4297 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4298 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4299 as follows 4300 4301 .vb 4302 1 2 0 | 0 3 0 | 0 4 4303 Proc0 0 5 6 | 7 0 0 | 8 0 4304 9 0 10 | 11 0 0 | 12 0 4305 ------------------------------------- 4306 13 0 14 | 15 16 17 | 0 0 4307 Proc1 0 18 0 | 19 20 21 | 0 0 4308 0 0 0 | 22 23 0 | 24 0 4309 ------------------------------------- 4310 Proc2 25 26 27 | 0 0 28 | 29 0 4311 30 0 0 | 31 32 33 | 0 34 4312 .ve 4313 4314 This can be represented as a collection of submatrices as 4315 4316 .vb 4317 A B C 4318 D E F 4319 G H I 4320 .ve 4321 4322 Where the submatrices A,B,C are owned by proc0, D,E,F are 4323 owned by proc1, G,H,I are owned by proc2. 4324 4325 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4326 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4327 The 'M','N' parameters are 8,8, and have the same values on all procs. 4328 4329 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4330 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4331 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4332 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4333 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4334 matrix, ans [DF] as another SeqAIJ matrix. 4335 4336 When d_nz, o_nz parameters are specified, d_nz storage elements are 4337 allocated for every row of the local diagonal submatrix, and o_nz 4338 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4339 One way to choose d_nz and o_nz is to use the max nonzerors per local 4340 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4341 In this case, the values of d_nz,o_nz are 4342 .vb 4343 proc0 : dnz = 2, o_nz = 2 4344 proc1 : dnz = 3, o_nz = 2 4345 proc2 : dnz = 1, o_nz = 4 4346 .ve 4347 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4348 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4349 for proc3. i.e we are using 12+15+10=37 storage locations to store 4350 34 values. 4351 4352 When d_nnz, o_nnz parameters are specified, the storage is specified 4353 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4354 In the above case the values for d_nnz,o_nnz are 4355 .vb 4356 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4357 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4358 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4359 .ve 4360 Here the space allocated is sum of all the above values i.e 34, and 4361 hence pre-allocation is perfect. 4362 4363 Level: intermediate 4364 4365 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4366 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4367 @*/ 4368 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4369 { 4370 PetscMPIInt size; 4371 4372 PetscFunctionBegin; 4373 PetscCall(MatCreate(comm,A)); 4374 PetscCall(MatSetSizes(*A,m,n,M,N)); 4375 PetscCallMPI(MPI_Comm_size(comm,&size)); 4376 if (size > 1) { 4377 PetscCall(MatSetType(*A,MATMPIAIJ)); 4378 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4379 } else { 4380 PetscCall(MatSetType(*A,MATSEQAIJ)); 4381 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4382 } 4383 PetscFunctionReturn(0); 4384 } 4385 4386 /*@C 4387 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4388 4389 Not collective 4390 4391 Input Parameter: 4392 . A - The MPIAIJ matrix 4393 4394 Output Parameters: 4395 + Ad - The local diagonal block as a SeqAIJ matrix 4396 . Ao - The local off-diagonal block as a SeqAIJ matrix 4397 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4398 4399 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4400 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4401 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4402 local column numbers to global column numbers in the original matrix. 4403 4404 Level: intermediate 4405 4406 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4407 @*/ 4408 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4409 { 4410 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4411 PetscBool flg; 4412 4413 PetscFunctionBegin; 4414 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4415 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4416 if (Ad) *Ad = a->A; 4417 if (Ao) *Ao = a->B; 4418 if (colmap) *colmap = a->garray; 4419 PetscFunctionReturn(0); 4420 } 4421 4422 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4423 { 4424 PetscInt m,N,i,rstart,nnz,Ii; 4425 PetscInt *indx; 4426 PetscScalar *values; 4427 MatType rootType; 4428 4429 PetscFunctionBegin; 4430 PetscCall(MatGetSize(inmat,&m,&N)); 4431 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4432 PetscInt *dnz,*onz,sum,bs,cbs; 4433 4434 if (n == PETSC_DECIDE) { 4435 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4436 } 4437 /* Check sum(n) = N */ 4438 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4439 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4440 4441 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4442 rstart -= m; 4443 4444 MatPreallocateBegin(comm,m,n,dnz,onz); 4445 for (i=0; i<m; i++) { 4446 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4447 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4448 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4449 } 4450 4451 PetscCall(MatCreate(comm,outmat)); 4452 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4453 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4454 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4455 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4456 PetscCall(MatSetType(*outmat,rootType)); 4457 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4458 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4459 MatPreallocateEnd(dnz,onz); 4460 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4461 } 4462 4463 /* numeric phase */ 4464 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4465 for (i=0; i<m; i++) { 4466 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4467 Ii = i + rstart; 4468 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4469 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4470 } 4471 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4472 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4477 { 4478 PetscMPIInt rank; 4479 PetscInt m,N,i,rstart,nnz; 4480 size_t len; 4481 const PetscInt *indx; 4482 PetscViewer out; 4483 char *name; 4484 Mat B; 4485 const PetscScalar *values; 4486 4487 PetscFunctionBegin; 4488 PetscCall(MatGetLocalSize(A,&m,NULL)); 4489 PetscCall(MatGetSize(A,NULL,&N)); 4490 /* Should this be the type of the diagonal block of A? */ 4491 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4492 PetscCall(MatSetSizes(B,m,N,m,N)); 4493 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4494 PetscCall(MatSetType(B,MATSEQAIJ)); 4495 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4496 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4497 for (i=0; i<m; i++) { 4498 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4499 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4500 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4501 } 4502 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4503 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4504 4505 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4506 PetscCall(PetscStrlen(outfile,&len)); 4507 PetscCall(PetscMalloc1(len+6,&name)); 4508 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4509 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4510 PetscCall(PetscFree(name)); 4511 PetscCall(MatView(B,out)); 4512 PetscCall(PetscViewerDestroy(&out)); 4513 PetscCall(MatDestroy(&B)); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4518 { 4519 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4520 4521 PetscFunctionBegin; 4522 if (!merge) PetscFunctionReturn(0); 4523 PetscCall(PetscFree(merge->id_r)); 4524 PetscCall(PetscFree(merge->len_s)); 4525 PetscCall(PetscFree(merge->len_r)); 4526 PetscCall(PetscFree(merge->bi)); 4527 PetscCall(PetscFree(merge->bj)); 4528 PetscCall(PetscFree(merge->buf_ri[0])); 4529 PetscCall(PetscFree(merge->buf_ri)); 4530 PetscCall(PetscFree(merge->buf_rj[0])); 4531 PetscCall(PetscFree(merge->buf_rj)); 4532 PetscCall(PetscFree(merge->coi)); 4533 PetscCall(PetscFree(merge->coj)); 4534 PetscCall(PetscFree(merge->owners_co)); 4535 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4536 PetscCall(PetscFree(merge)); 4537 PetscFunctionReturn(0); 4538 } 4539 4540 #include <../src/mat/utils/freespace.h> 4541 #include <petscbt.h> 4542 4543 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4544 { 4545 MPI_Comm comm; 4546 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4547 PetscMPIInt size,rank,taga,*len_s; 4548 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4549 PetscInt proc,m; 4550 PetscInt **buf_ri,**buf_rj; 4551 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4552 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4553 MPI_Request *s_waits,*r_waits; 4554 MPI_Status *status; 4555 const MatScalar *aa,*a_a; 4556 MatScalar **abuf_r,*ba_i; 4557 Mat_Merge_SeqsToMPI *merge; 4558 PetscContainer container; 4559 4560 PetscFunctionBegin; 4561 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4562 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4563 4564 PetscCallMPI(MPI_Comm_size(comm,&size)); 4565 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4566 4567 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4568 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4569 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4570 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4571 aa = a_a; 4572 4573 bi = merge->bi; 4574 bj = merge->bj; 4575 buf_ri = merge->buf_ri; 4576 buf_rj = merge->buf_rj; 4577 4578 PetscCall(PetscMalloc1(size,&status)); 4579 owners = merge->rowmap->range; 4580 len_s = merge->len_s; 4581 4582 /* send and recv matrix values */ 4583 /*-----------------------------*/ 4584 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4585 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4586 4587 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4588 for (proc=0,k=0; proc<size; proc++) { 4589 if (!len_s[proc]) continue; 4590 i = owners[proc]; 4591 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4592 k++; 4593 } 4594 4595 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4596 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4597 PetscCall(PetscFree(status)); 4598 4599 PetscCall(PetscFree(s_waits)); 4600 PetscCall(PetscFree(r_waits)); 4601 4602 /* insert mat values of mpimat */ 4603 /*----------------------------*/ 4604 PetscCall(PetscMalloc1(N,&ba_i)); 4605 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4606 4607 for (k=0; k<merge->nrecv; k++) { 4608 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4609 nrows = *(buf_ri_k[k]); 4610 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4611 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4612 } 4613 4614 /* set values of ba */ 4615 m = merge->rowmap->n; 4616 for (i=0; i<m; i++) { 4617 arow = owners[rank] + i; 4618 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4619 bnzi = bi[i+1] - bi[i]; 4620 PetscCall(PetscArrayzero(ba_i,bnzi)); 4621 4622 /* add local non-zero vals of this proc's seqmat into ba */ 4623 anzi = ai[arow+1] - ai[arow]; 4624 aj = a->j + ai[arow]; 4625 aa = a_a + ai[arow]; 4626 nextaj = 0; 4627 for (j=0; nextaj<anzi; j++) { 4628 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4629 ba_i[j] += aa[nextaj++]; 4630 } 4631 } 4632 4633 /* add received vals into ba */ 4634 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4635 /* i-th row */ 4636 if (i == *nextrow[k]) { 4637 anzi = *(nextai[k]+1) - *nextai[k]; 4638 aj = buf_rj[k] + *(nextai[k]); 4639 aa = abuf_r[k] + *(nextai[k]); 4640 nextaj = 0; 4641 for (j=0; nextaj<anzi; j++) { 4642 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4643 ba_i[j] += aa[nextaj++]; 4644 } 4645 } 4646 nextrow[k]++; nextai[k]++; 4647 } 4648 } 4649 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4650 } 4651 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4652 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4653 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4654 4655 PetscCall(PetscFree(abuf_r[0])); 4656 PetscCall(PetscFree(abuf_r)); 4657 PetscCall(PetscFree(ba_i)); 4658 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4659 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4664 { 4665 Mat B_mpi; 4666 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4667 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4668 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4669 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4670 PetscInt len,proc,*dnz,*onz,bs,cbs; 4671 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4672 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4673 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4674 MPI_Status *status; 4675 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4676 PetscBT lnkbt; 4677 Mat_Merge_SeqsToMPI *merge; 4678 PetscContainer container; 4679 4680 PetscFunctionBegin; 4681 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4682 4683 /* make sure it is a PETSc comm */ 4684 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4685 PetscCallMPI(MPI_Comm_size(comm,&size)); 4686 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4687 4688 PetscCall(PetscNew(&merge)); 4689 PetscCall(PetscMalloc1(size,&status)); 4690 4691 /* determine row ownership */ 4692 /*---------------------------------------------------------*/ 4693 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4694 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4695 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4696 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4697 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4698 PetscCall(PetscMalloc1(size,&len_si)); 4699 PetscCall(PetscMalloc1(size,&merge->len_s)); 4700 4701 m = merge->rowmap->n; 4702 owners = merge->rowmap->range; 4703 4704 /* determine the number of messages to send, their lengths */ 4705 /*---------------------------------------------------------*/ 4706 len_s = merge->len_s; 4707 4708 len = 0; /* length of buf_si[] */ 4709 merge->nsend = 0; 4710 for (proc=0; proc<size; proc++) { 4711 len_si[proc] = 0; 4712 if (proc == rank) { 4713 len_s[proc] = 0; 4714 } else { 4715 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4716 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4717 } 4718 if (len_s[proc]) { 4719 merge->nsend++; 4720 nrows = 0; 4721 for (i=owners[proc]; i<owners[proc+1]; i++) { 4722 if (ai[i+1] > ai[i]) nrows++; 4723 } 4724 len_si[proc] = 2*(nrows+1); 4725 len += len_si[proc]; 4726 } 4727 } 4728 4729 /* determine the number and length of messages to receive for ij-structure */ 4730 /*-------------------------------------------------------------------------*/ 4731 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4732 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4733 4734 /* post the Irecv of j-structure */ 4735 /*-------------------------------*/ 4736 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4737 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4738 4739 /* post the Isend of j-structure */ 4740 /*--------------------------------*/ 4741 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4742 4743 for (proc=0, k=0; proc<size; proc++) { 4744 if (!len_s[proc]) continue; 4745 i = owners[proc]; 4746 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4747 k++; 4748 } 4749 4750 /* receives and sends of j-structure are complete */ 4751 /*------------------------------------------------*/ 4752 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4753 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4754 4755 /* send and recv i-structure */ 4756 /*---------------------------*/ 4757 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4758 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4759 4760 PetscCall(PetscMalloc1(len+1,&buf_s)); 4761 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4762 for (proc=0,k=0; proc<size; proc++) { 4763 if (!len_s[proc]) continue; 4764 /* form outgoing message for i-structure: 4765 buf_si[0]: nrows to be sent 4766 [1:nrows]: row index (global) 4767 [nrows+1:2*nrows+1]: i-structure index 4768 */ 4769 /*-------------------------------------------*/ 4770 nrows = len_si[proc]/2 - 1; 4771 buf_si_i = buf_si + nrows+1; 4772 buf_si[0] = nrows; 4773 buf_si_i[0] = 0; 4774 nrows = 0; 4775 for (i=owners[proc]; i<owners[proc+1]; i++) { 4776 anzi = ai[i+1] - ai[i]; 4777 if (anzi) { 4778 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4779 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4780 nrows++; 4781 } 4782 } 4783 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4784 k++; 4785 buf_si += len_si[proc]; 4786 } 4787 4788 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4789 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4790 4791 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4792 for (i=0; i<merge->nrecv; i++) { 4793 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4794 } 4795 4796 PetscCall(PetscFree(len_si)); 4797 PetscCall(PetscFree(len_ri)); 4798 PetscCall(PetscFree(rj_waits)); 4799 PetscCall(PetscFree2(si_waits,sj_waits)); 4800 PetscCall(PetscFree(ri_waits)); 4801 PetscCall(PetscFree(buf_s)); 4802 PetscCall(PetscFree(status)); 4803 4804 /* compute a local seq matrix in each processor */ 4805 /*----------------------------------------------*/ 4806 /* allocate bi array and free space for accumulating nonzero column info */ 4807 PetscCall(PetscMalloc1(m+1,&bi)); 4808 bi[0] = 0; 4809 4810 /* create and initialize a linked list */ 4811 nlnk = N+1; 4812 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4813 4814 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4815 len = ai[owners[rank+1]] - ai[owners[rank]]; 4816 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4817 4818 current_space = free_space; 4819 4820 /* determine symbolic info for each local row */ 4821 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4822 4823 for (k=0; k<merge->nrecv; k++) { 4824 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4825 nrows = *buf_ri_k[k]; 4826 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4827 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4828 } 4829 4830 MatPreallocateBegin(comm,m,n,dnz,onz); 4831 len = 0; 4832 for (i=0; i<m; i++) { 4833 bnzi = 0; 4834 /* add local non-zero cols of this proc's seqmat into lnk */ 4835 arow = owners[rank] + i; 4836 anzi = ai[arow+1] - ai[arow]; 4837 aj = a->j + ai[arow]; 4838 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4839 bnzi += nlnk; 4840 /* add received col data into lnk */ 4841 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4842 if (i == *nextrow[k]) { /* i-th row */ 4843 anzi = *(nextai[k]+1) - *nextai[k]; 4844 aj = buf_rj[k] + *nextai[k]; 4845 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4846 bnzi += nlnk; 4847 nextrow[k]++; nextai[k]++; 4848 } 4849 } 4850 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4851 4852 /* if free space is not available, make more free space */ 4853 if (current_space->local_remaining<bnzi) { 4854 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4855 nspacedouble++; 4856 } 4857 /* copy data into free space, then initialize lnk */ 4858 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4859 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4860 4861 current_space->array += bnzi; 4862 current_space->local_used += bnzi; 4863 current_space->local_remaining -= bnzi; 4864 4865 bi[i+1] = bi[i] + bnzi; 4866 } 4867 4868 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4869 4870 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4871 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4872 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4873 4874 /* create symbolic parallel matrix B_mpi */ 4875 /*---------------------------------------*/ 4876 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4877 PetscCall(MatCreate(comm,&B_mpi)); 4878 if (n==PETSC_DECIDE) { 4879 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4880 } else { 4881 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4882 } 4883 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4884 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4885 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4886 MatPreallocateEnd(dnz,onz); 4887 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4888 4889 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4890 B_mpi->assembled = PETSC_FALSE; 4891 merge->bi = bi; 4892 merge->bj = bj; 4893 merge->buf_ri = buf_ri; 4894 merge->buf_rj = buf_rj; 4895 merge->coi = NULL; 4896 merge->coj = NULL; 4897 merge->owners_co = NULL; 4898 4899 PetscCall(PetscCommDestroy(&comm)); 4900 4901 /* attach the supporting struct to B_mpi for reuse */ 4902 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4903 PetscCall(PetscContainerSetPointer(container,merge)); 4904 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4905 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4906 PetscCall(PetscContainerDestroy(&container)); 4907 *mpimat = B_mpi; 4908 4909 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4910 PetscFunctionReturn(0); 4911 } 4912 4913 /*@C 4914 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4915 matrices from each processor 4916 4917 Collective 4918 4919 Input Parameters: 4920 + comm - the communicators the parallel matrix will live on 4921 . seqmat - the input sequential matrices 4922 . m - number of local rows (or PETSC_DECIDE) 4923 . n - number of local columns (or PETSC_DECIDE) 4924 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4925 4926 Output Parameter: 4927 . mpimat - the parallel matrix generated 4928 4929 Level: advanced 4930 4931 Notes: 4932 The dimensions of the sequential matrix in each processor MUST be the same. 4933 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4934 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4935 @*/ 4936 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4937 { 4938 PetscMPIInt size; 4939 4940 PetscFunctionBegin; 4941 PetscCallMPI(MPI_Comm_size(comm,&size)); 4942 if (size == 1) { 4943 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4944 if (scall == MAT_INITIAL_MATRIX) { 4945 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4946 } else { 4947 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4948 } 4949 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4950 PetscFunctionReturn(0); 4951 } 4952 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4953 if (scall == MAT_INITIAL_MATRIX) { 4954 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4955 } 4956 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4957 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4958 PetscFunctionReturn(0); 4959 } 4960 4961 /*@ 4962 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4963 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4964 with MatGetSize() 4965 4966 Not Collective 4967 4968 Input Parameters: 4969 + A - the matrix 4970 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4971 4972 Output Parameter: 4973 . A_loc - the local sequential matrix generated 4974 4975 Level: developer 4976 4977 Notes: 4978 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4979 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4980 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4981 modify the values of the returned A_loc. 4982 4983 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge() 4984 @*/ 4985 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4986 { 4987 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4988 Mat_SeqAIJ *mat,*a,*b; 4989 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4990 const PetscScalar *aa,*ba,*aav,*bav; 4991 PetscScalar *ca,*cam; 4992 PetscMPIInt size; 4993 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4994 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4995 PetscBool match; 4996 4997 PetscFunctionBegin; 4998 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 4999 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5000 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5001 if (size == 1) { 5002 if (scall == MAT_INITIAL_MATRIX) { 5003 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5004 *A_loc = mpimat->A; 5005 } else if (scall == MAT_REUSE_MATRIX) { 5006 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5007 } 5008 PetscFunctionReturn(0); 5009 } 5010 5011 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5012 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5013 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5014 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5015 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5016 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5017 aa = aav; 5018 ba = bav; 5019 if (scall == MAT_INITIAL_MATRIX) { 5020 PetscCall(PetscMalloc1(1+am,&ci)); 5021 ci[0] = 0; 5022 for (i=0; i<am; i++) { 5023 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5024 } 5025 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5026 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5027 k = 0; 5028 for (i=0; i<am; i++) { 5029 ncols_o = bi[i+1] - bi[i]; 5030 ncols_d = ai[i+1] - ai[i]; 5031 /* off-diagonal portion of A */ 5032 for (jo=0; jo<ncols_o; jo++) { 5033 col = cmap[*bj]; 5034 if (col >= cstart) break; 5035 cj[k] = col; bj++; 5036 ca[k++] = *ba++; 5037 } 5038 /* diagonal portion of A */ 5039 for (j=0; j<ncols_d; j++) { 5040 cj[k] = cstart + *aj++; 5041 ca[k++] = *aa++; 5042 } 5043 /* off-diagonal portion of A */ 5044 for (j=jo; j<ncols_o; j++) { 5045 cj[k] = cmap[*bj++]; 5046 ca[k++] = *ba++; 5047 } 5048 } 5049 /* put together the new matrix */ 5050 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5051 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5052 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5053 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5054 mat->free_a = PETSC_TRUE; 5055 mat->free_ij = PETSC_TRUE; 5056 mat->nonew = 0; 5057 } else if (scall == MAT_REUSE_MATRIX) { 5058 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5059 ci = mat->i; 5060 cj = mat->j; 5061 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5062 for (i=0; i<am; i++) { 5063 /* off-diagonal portion of A */ 5064 ncols_o = bi[i+1] - bi[i]; 5065 for (jo=0; jo<ncols_o; jo++) { 5066 col = cmap[*bj]; 5067 if (col >= cstart) break; 5068 *cam++ = *ba++; bj++; 5069 } 5070 /* diagonal portion of A */ 5071 ncols_d = ai[i+1] - ai[i]; 5072 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5073 /* off-diagonal portion of A */ 5074 for (j=jo; j<ncols_o; j++) { 5075 *cam++ = *ba++; bj++; 5076 } 5077 } 5078 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5079 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5080 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5081 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5082 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5083 PetscFunctionReturn(0); 5084 } 5085 5086 /*@ 5087 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5088 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5089 5090 Not Collective 5091 5092 Input Parameters: 5093 + A - the matrix 5094 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5095 5096 Output Parameters: 5097 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5098 - A_loc - the local sequential matrix generated 5099 5100 Level: developer 5101 5102 Notes: 5103 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5104 5105 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed() 5106 5107 @*/ 5108 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5109 { 5110 Mat Ao,Ad; 5111 const PetscInt *cmap; 5112 PetscMPIInt size; 5113 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5114 5115 PetscFunctionBegin; 5116 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5117 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5118 if (size == 1) { 5119 if (scall == MAT_INITIAL_MATRIX) { 5120 PetscCall(PetscObjectReference((PetscObject)Ad)); 5121 *A_loc = Ad; 5122 } else if (scall == MAT_REUSE_MATRIX) { 5123 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5124 } 5125 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5126 PetscFunctionReturn(0); 5127 } 5128 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5129 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5130 if (f) { 5131 PetscCall((*f)(A,scall,glob,A_loc)); 5132 } else { 5133 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5134 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5135 Mat_SeqAIJ *c; 5136 PetscInt *ai = a->i, *aj = a->j; 5137 PetscInt *bi = b->i, *bj = b->j; 5138 PetscInt *ci,*cj; 5139 const PetscScalar *aa,*ba; 5140 PetscScalar *ca; 5141 PetscInt i,j,am,dn,on; 5142 5143 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5144 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5145 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5146 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5147 if (scall == MAT_INITIAL_MATRIX) { 5148 PetscInt k; 5149 PetscCall(PetscMalloc1(1+am,&ci)); 5150 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5151 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5152 ci[0] = 0; 5153 for (i=0,k=0; i<am; i++) { 5154 const PetscInt ncols_o = bi[i+1] - bi[i]; 5155 const PetscInt ncols_d = ai[i+1] - ai[i]; 5156 ci[i+1] = ci[i] + ncols_o + ncols_d; 5157 /* diagonal portion of A */ 5158 for (j=0; j<ncols_d; j++,k++) { 5159 cj[k] = *aj++; 5160 ca[k] = *aa++; 5161 } 5162 /* off-diagonal portion of A */ 5163 for (j=0; j<ncols_o; j++,k++) { 5164 cj[k] = dn + *bj++; 5165 ca[k] = *ba++; 5166 } 5167 } 5168 /* put together the new matrix */ 5169 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5170 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5171 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5172 c = (Mat_SeqAIJ*)(*A_loc)->data; 5173 c->free_a = PETSC_TRUE; 5174 c->free_ij = PETSC_TRUE; 5175 c->nonew = 0; 5176 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5177 } else if (scall == MAT_REUSE_MATRIX) { 5178 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5179 for (i=0; i<am; i++) { 5180 const PetscInt ncols_d = ai[i+1] - ai[i]; 5181 const PetscInt ncols_o = bi[i+1] - bi[i]; 5182 /* diagonal portion of A */ 5183 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5184 /* off-diagonal portion of A */ 5185 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5186 } 5187 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5188 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5189 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5190 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5191 if (glob) { 5192 PetscInt cst, *gidx; 5193 5194 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5195 PetscCall(PetscMalloc1(dn+on,&gidx)); 5196 for (i=0; i<dn; i++) gidx[i] = cst + i; 5197 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5198 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5199 } 5200 } 5201 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 /*@C 5206 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5207 5208 Not Collective 5209 5210 Input Parameters: 5211 + A - the matrix 5212 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5213 - row, col - index sets of rows and columns to extract (or NULL) 5214 5215 Output Parameter: 5216 . A_loc - the local sequential matrix generated 5217 5218 Level: developer 5219 5220 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5221 5222 @*/ 5223 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5224 { 5225 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5226 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5227 IS isrowa,iscola; 5228 Mat *aloc; 5229 PetscBool match; 5230 5231 PetscFunctionBegin; 5232 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5233 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5234 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5235 if (!row) { 5236 start = A->rmap->rstart; end = A->rmap->rend; 5237 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5238 } else { 5239 isrowa = *row; 5240 } 5241 if (!col) { 5242 start = A->cmap->rstart; 5243 cmap = a->garray; 5244 nzA = a->A->cmap->n; 5245 nzB = a->B->cmap->n; 5246 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5247 ncols = 0; 5248 for (i=0; i<nzB; i++) { 5249 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5250 else break; 5251 } 5252 imark = i; 5253 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5254 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5255 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5256 } else { 5257 iscola = *col; 5258 } 5259 if (scall != MAT_INITIAL_MATRIX) { 5260 PetscCall(PetscMalloc1(1,&aloc)); 5261 aloc[0] = *A_loc; 5262 } 5263 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5264 if (!col) { /* attach global id of condensed columns */ 5265 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5266 } 5267 *A_loc = aloc[0]; 5268 PetscCall(PetscFree(aloc)); 5269 if (!row) { 5270 PetscCall(ISDestroy(&isrowa)); 5271 } 5272 if (!col) { 5273 PetscCall(ISDestroy(&iscola)); 5274 } 5275 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5276 PetscFunctionReturn(0); 5277 } 5278 5279 /* 5280 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5281 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5282 * on a global size. 5283 * */ 5284 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5285 { 5286 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5287 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5288 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5289 PetscMPIInt owner; 5290 PetscSFNode *iremote,*oiremote; 5291 const PetscInt *lrowindices; 5292 PetscSF sf,osf; 5293 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5294 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5295 MPI_Comm comm; 5296 ISLocalToGlobalMapping mapping; 5297 const PetscScalar *pd_a,*po_a; 5298 5299 PetscFunctionBegin; 5300 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5301 /* plocalsize is the number of roots 5302 * nrows is the number of leaves 5303 * */ 5304 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5305 PetscCall(ISGetLocalSize(rows,&nrows)); 5306 PetscCall(PetscCalloc1(nrows,&iremote)); 5307 PetscCall(ISGetIndices(rows,&lrowindices)); 5308 for (i=0;i<nrows;i++) { 5309 /* Find a remote index and an owner for a row 5310 * The row could be local or remote 5311 * */ 5312 owner = 0; 5313 lidx = 0; 5314 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5315 iremote[i].index = lidx; 5316 iremote[i].rank = owner; 5317 } 5318 /* Create SF to communicate how many nonzero columns for each row */ 5319 PetscCall(PetscSFCreate(comm,&sf)); 5320 /* SF will figure out the number of nonzero colunms for each row, and their 5321 * offsets 5322 * */ 5323 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5324 PetscCall(PetscSFSetFromOptions(sf)); 5325 PetscCall(PetscSFSetUp(sf)); 5326 5327 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5328 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5329 PetscCall(PetscCalloc1(nrows,&pnnz)); 5330 roffsets[0] = 0; 5331 roffsets[1] = 0; 5332 for (i=0;i<plocalsize;i++) { 5333 /* diag */ 5334 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5335 /* off diag */ 5336 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5337 /* compute offsets so that we relative location for each row */ 5338 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5339 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5340 } 5341 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5342 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5343 /* 'r' means root, and 'l' means leaf */ 5344 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5345 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5346 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5347 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5348 PetscCall(PetscSFDestroy(&sf)); 5349 PetscCall(PetscFree(roffsets)); 5350 PetscCall(PetscFree(nrcols)); 5351 dntotalcols = 0; 5352 ontotalcols = 0; 5353 ncol = 0; 5354 for (i=0;i<nrows;i++) { 5355 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5356 ncol = PetscMax(pnnz[i],ncol); 5357 /* diag */ 5358 dntotalcols += nlcols[i*2+0]; 5359 /* off diag */ 5360 ontotalcols += nlcols[i*2+1]; 5361 } 5362 /* We do not need to figure the right number of columns 5363 * since all the calculations will be done by going through the raw data 5364 * */ 5365 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5366 PetscCall(MatSetUp(*P_oth)); 5367 PetscCall(PetscFree(pnnz)); 5368 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5369 /* diag */ 5370 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5371 /* off diag */ 5372 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5373 /* diag */ 5374 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5375 /* off diag */ 5376 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5377 dntotalcols = 0; 5378 ontotalcols = 0; 5379 ntotalcols = 0; 5380 for (i=0;i<nrows;i++) { 5381 owner = 0; 5382 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5383 /* Set iremote for diag matrix */ 5384 for (j=0;j<nlcols[i*2+0];j++) { 5385 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5386 iremote[dntotalcols].rank = owner; 5387 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5388 ilocal[dntotalcols++] = ntotalcols++; 5389 } 5390 /* off diag */ 5391 for (j=0;j<nlcols[i*2+1];j++) { 5392 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5393 oiremote[ontotalcols].rank = owner; 5394 oilocal[ontotalcols++] = ntotalcols++; 5395 } 5396 } 5397 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5398 PetscCall(PetscFree(loffsets)); 5399 PetscCall(PetscFree(nlcols)); 5400 PetscCall(PetscSFCreate(comm,&sf)); 5401 /* P serves as roots and P_oth is leaves 5402 * Diag matrix 5403 * */ 5404 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5405 PetscCall(PetscSFSetFromOptions(sf)); 5406 PetscCall(PetscSFSetUp(sf)); 5407 5408 PetscCall(PetscSFCreate(comm,&osf)); 5409 /* Off diag */ 5410 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5411 PetscCall(PetscSFSetFromOptions(osf)); 5412 PetscCall(PetscSFSetUp(osf)); 5413 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5414 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5415 /* We operate on the matrix internal data for saving memory */ 5416 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5417 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5418 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5419 /* Convert to global indices for diag matrix */ 5420 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5421 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5422 /* We want P_oth store global indices */ 5423 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5424 /* Use memory scalable approach */ 5425 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5426 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5427 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5428 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5429 /* Convert back to local indices */ 5430 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5431 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5432 nout = 0; 5433 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5434 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5435 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5436 /* Exchange values */ 5437 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5438 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5439 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5440 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5441 /* Stop PETSc from shrinking memory */ 5442 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5443 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5444 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5445 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5446 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5447 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5448 PetscCall(PetscSFDestroy(&sf)); 5449 PetscCall(PetscSFDestroy(&osf)); 5450 PetscFunctionReturn(0); 5451 } 5452 5453 /* 5454 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5455 * This supports MPIAIJ and MAIJ 5456 * */ 5457 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5458 { 5459 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5460 Mat_SeqAIJ *p_oth; 5461 IS rows,map; 5462 PetscHMapI hamp; 5463 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5464 MPI_Comm comm; 5465 PetscSF sf,osf; 5466 PetscBool has; 5467 5468 PetscFunctionBegin; 5469 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5470 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5471 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5472 * and then create a submatrix (that often is an overlapping matrix) 5473 * */ 5474 if (reuse == MAT_INITIAL_MATRIX) { 5475 /* Use a hash table to figure out unique keys */ 5476 PetscCall(PetscHMapICreate(&hamp)); 5477 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5478 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5479 count = 0; 5480 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5481 for (i=0;i<a->B->cmap->n;i++) { 5482 key = a->garray[i]/dof; 5483 PetscCall(PetscHMapIHas(hamp,key,&has)); 5484 if (!has) { 5485 mapping[i] = count; 5486 PetscCall(PetscHMapISet(hamp,key,count++)); 5487 } else { 5488 /* Current 'i' has the same value the previous step */ 5489 mapping[i] = count-1; 5490 } 5491 } 5492 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5493 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5494 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5495 PetscCall(PetscCalloc1(htsize,&rowindices)); 5496 off = 0; 5497 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5498 PetscCall(PetscHMapIDestroy(&hamp)); 5499 PetscCall(PetscSortInt(htsize,rowindices)); 5500 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5501 /* In case, the matrix was already created but users want to recreate the matrix */ 5502 PetscCall(MatDestroy(P_oth)); 5503 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5504 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5505 PetscCall(ISDestroy(&map)); 5506 PetscCall(ISDestroy(&rows)); 5507 } else if (reuse == MAT_REUSE_MATRIX) { 5508 /* If matrix was already created, we simply update values using SF objects 5509 * that as attached to the matrix ealier. 5510 */ 5511 const PetscScalar *pd_a,*po_a; 5512 5513 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5514 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5515 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5516 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5517 /* Update values in place */ 5518 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5519 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5520 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5521 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5522 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5523 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5524 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5525 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5526 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5527 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5528 PetscFunctionReturn(0); 5529 } 5530 5531 /*@C 5532 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5533 5534 Collective on Mat 5535 5536 Input Parameters: 5537 + A - the first matrix in mpiaij format 5538 . B - the second matrix in mpiaij format 5539 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5540 5541 Output Parameters: 5542 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5543 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5544 - B_seq - the sequential matrix generated 5545 5546 Level: developer 5547 5548 @*/ 5549 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5550 { 5551 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5552 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5553 IS isrowb,iscolb; 5554 Mat *bseq=NULL; 5555 5556 PetscFunctionBegin; 5557 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5558 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5559 } 5560 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5561 5562 if (scall == MAT_INITIAL_MATRIX) { 5563 start = A->cmap->rstart; 5564 cmap = a->garray; 5565 nzA = a->A->cmap->n; 5566 nzB = a->B->cmap->n; 5567 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5568 ncols = 0; 5569 for (i=0; i<nzB; i++) { /* row < local row index */ 5570 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5571 else break; 5572 } 5573 imark = i; 5574 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5575 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5576 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5577 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5578 } else { 5579 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5580 isrowb = *rowb; iscolb = *colb; 5581 PetscCall(PetscMalloc1(1,&bseq)); 5582 bseq[0] = *B_seq; 5583 } 5584 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5585 *B_seq = bseq[0]; 5586 PetscCall(PetscFree(bseq)); 5587 if (!rowb) { 5588 PetscCall(ISDestroy(&isrowb)); 5589 } else { 5590 *rowb = isrowb; 5591 } 5592 if (!colb) { 5593 PetscCall(ISDestroy(&iscolb)); 5594 } else { 5595 *colb = iscolb; 5596 } 5597 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5598 PetscFunctionReturn(0); 5599 } 5600 5601 /* 5602 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5603 of the OFF-DIAGONAL portion of local A 5604 5605 Collective on Mat 5606 5607 Input Parameters: 5608 + A,B - the matrices in mpiaij format 5609 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5610 5611 Output Parameter: 5612 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5613 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5614 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5615 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5616 5617 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5618 for this matrix. This is not desirable.. 5619 5620 Level: developer 5621 5622 */ 5623 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5624 { 5625 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5626 Mat_SeqAIJ *b_oth; 5627 VecScatter ctx; 5628 MPI_Comm comm; 5629 const PetscMPIInt *rprocs,*sprocs; 5630 const PetscInt *srow,*rstarts,*sstarts; 5631 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5632 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5633 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5634 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5635 PetscMPIInt size,tag,rank,nreqs; 5636 5637 PetscFunctionBegin; 5638 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5639 PetscCallMPI(MPI_Comm_size(comm,&size)); 5640 5641 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5642 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5643 } 5644 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5645 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5646 5647 if (size == 1) { 5648 startsj_s = NULL; 5649 bufa_ptr = NULL; 5650 *B_oth = NULL; 5651 PetscFunctionReturn(0); 5652 } 5653 5654 ctx = a->Mvctx; 5655 tag = ((PetscObject)ctx)->tag; 5656 5657 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5658 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5659 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5660 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5661 PetscCall(PetscMalloc1(nreqs,&reqs)); 5662 rwaits = reqs; 5663 swaits = reqs + nrecvs; 5664 5665 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5666 if (scall == MAT_INITIAL_MATRIX) { 5667 /* i-array */ 5668 /*---------*/ 5669 /* post receives */ 5670 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5671 for (i=0; i<nrecvs; i++) { 5672 rowlen = rvalues + rstarts[i]*rbs; 5673 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5674 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5675 } 5676 5677 /* pack the outgoing message */ 5678 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5679 5680 sstartsj[0] = 0; 5681 rstartsj[0] = 0; 5682 len = 0; /* total length of j or a array to be sent */ 5683 if (nsends) { 5684 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5685 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5686 } 5687 for (i=0; i<nsends; i++) { 5688 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5689 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5690 for (j=0; j<nrows; j++) { 5691 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5692 for (l=0; l<sbs; l++) { 5693 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5694 5695 rowlen[j*sbs+l] = ncols; 5696 5697 len += ncols; 5698 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5699 } 5700 k++; 5701 } 5702 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5703 5704 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5705 } 5706 /* recvs and sends of i-array are completed */ 5707 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5708 PetscCall(PetscFree(svalues)); 5709 5710 /* allocate buffers for sending j and a arrays */ 5711 PetscCall(PetscMalloc1(len+1,&bufj)); 5712 PetscCall(PetscMalloc1(len+1,&bufa)); 5713 5714 /* create i-array of B_oth */ 5715 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5716 5717 b_othi[0] = 0; 5718 len = 0; /* total length of j or a array to be received */ 5719 k = 0; 5720 for (i=0; i<nrecvs; i++) { 5721 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5722 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5723 for (j=0; j<nrows; j++) { 5724 b_othi[k+1] = b_othi[k] + rowlen[j]; 5725 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5726 k++; 5727 } 5728 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5729 } 5730 PetscCall(PetscFree(rvalues)); 5731 5732 /* allocate space for j and a arrrays of B_oth */ 5733 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5734 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5735 5736 /* j-array */ 5737 /*---------*/ 5738 /* post receives of j-array */ 5739 for (i=0; i<nrecvs; i++) { 5740 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5741 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5742 } 5743 5744 /* pack the outgoing message j-array */ 5745 if (nsends) k = sstarts[0]; 5746 for (i=0; i<nsends; i++) { 5747 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5748 bufJ = bufj+sstartsj[i]; 5749 for (j=0; j<nrows; j++) { 5750 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5751 for (ll=0; ll<sbs; ll++) { 5752 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5753 for (l=0; l<ncols; l++) { 5754 *bufJ++ = cols[l]; 5755 } 5756 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5757 } 5758 } 5759 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5760 } 5761 5762 /* recvs and sends of j-array are completed */ 5763 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5764 } else if (scall == MAT_REUSE_MATRIX) { 5765 sstartsj = *startsj_s; 5766 rstartsj = *startsj_r; 5767 bufa = *bufa_ptr; 5768 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5769 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5770 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5771 5772 /* a-array */ 5773 /*---------*/ 5774 /* post receives of a-array */ 5775 for (i=0; i<nrecvs; i++) { 5776 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5777 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5778 } 5779 5780 /* pack the outgoing message a-array */ 5781 if (nsends) k = sstarts[0]; 5782 for (i=0; i<nsends; i++) { 5783 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5784 bufA = bufa+sstartsj[i]; 5785 for (j=0; j<nrows; j++) { 5786 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5787 for (ll=0; ll<sbs; ll++) { 5788 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5789 for (l=0; l<ncols; l++) { 5790 *bufA++ = vals[l]; 5791 } 5792 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5793 } 5794 } 5795 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5796 } 5797 /* recvs and sends of a-array are completed */ 5798 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5799 PetscCall(PetscFree(reqs)); 5800 5801 if (scall == MAT_INITIAL_MATRIX) { 5802 /* put together the new matrix */ 5803 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5804 5805 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5806 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5807 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5808 b_oth->free_a = PETSC_TRUE; 5809 b_oth->free_ij = PETSC_TRUE; 5810 b_oth->nonew = 0; 5811 5812 PetscCall(PetscFree(bufj)); 5813 if (!startsj_s || !bufa_ptr) { 5814 PetscCall(PetscFree2(sstartsj,rstartsj)); 5815 PetscCall(PetscFree(bufa_ptr)); 5816 } else { 5817 *startsj_s = sstartsj; 5818 *startsj_r = rstartsj; 5819 *bufa_ptr = bufa; 5820 } 5821 } else if (scall == MAT_REUSE_MATRIX) { 5822 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5823 } 5824 5825 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5826 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5827 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5828 PetscFunctionReturn(0); 5829 } 5830 5831 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5832 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5833 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5834 #if defined(PETSC_HAVE_MKL_SPARSE) 5835 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5836 #endif 5837 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5838 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5839 #if defined(PETSC_HAVE_ELEMENTAL) 5840 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5841 #endif 5842 #if defined(PETSC_HAVE_SCALAPACK) 5843 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5844 #endif 5845 #if defined(PETSC_HAVE_HYPRE) 5846 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5847 #endif 5848 #if defined(PETSC_HAVE_CUDA) 5849 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5850 #endif 5851 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5852 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5853 #endif 5854 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5855 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5856 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5857 5858 /* 5859 Computes (B'*A')' since computing B*A directly is untenable 5860 5861 n p p 5862 [ ] [ ] [ ] 5863 m [ A ] * n [ B ] = m [ C ] 5864 [ ] [ ] [ ] 5865 5866 */ 5867 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5868 { 5869 Mat At,Bt,Ct; 5870 5871 PetscFunctionBegin; 5872 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5873 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5874 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5875 PetscCall(MatDestroy(&At)); 5876 PetscCall(MatDestroy(&Bt)); 5877 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5878 PetscCall(MatDestroy(&Ct)); 5879 PetscFunctionReturn(0); 5880 } 5881 5882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5883 { 5884 PetscBool cisdense; 5885 5886 PetscFunctionBegin; 5887 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5888 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5889 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5890 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5891 if (!cisdense) { 5892 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5893 } 5894 PetscCall(MatSetUp(C)); 5895 5896 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5897 PetscFunctionReturn(0); 5898 } 5899 5900 /* ----------------------------------------------------------------*/ 5901 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5902 { 5903 Mat_Product *product = C->product; 5904 Mat A = product->A,B=product->B; 5905 5906 PetscFunctionBegin; 5907 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5908 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5909 5910 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5911 C->ops->productsymbolic = MatProductSymbolic_AB; 5912 PetscFunctionReturn(0); 5913 } 5914 5915 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5916 { 5917 Mat_Product *product = C->product; 5918 5919 PetscFunctionBegin; 5920 if (product->type == MATPRODUCT_AB) { 5921 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5922 } 5923 PetscFunctionReturn(0); 5924 } 5925 5926 /* std::upper_bound(): Given a sorted array, return index of the first element in range [first,last) whose value 5927 is greater than value, or last if there is no such element. 5928 */ 5929 static inline PetscErrorCode PetscSortedIntUpperBound(PetscInt *array,PetscCount first,PetscCount last,PetscInt value,PetscCount *upper) 5930 { 5931 PetscCount it,step,count = last - first; 5932 5933 PetscFunctionBegin; 5934 while (count > 0) { 5935 it = first; 5936 step = count / 2; 5937 it += step; 5938 if (!(value < array[it])) { 5939 first = ++it; 5940 count -= step + 1; 5941 } else count = step; 5942 } 5943 *upper = first; 5944 PetscFunctionReturn(0); 5945 } 5946 5947 /* Merge two sets of sorted nonzero entries and return a CSR for the merged (sequential) matrix 5948 5949 Input Parameters: 5950 5951 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5952 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5953 5954 mat: both sets' entries are on m rows, where m is the number of local rows of the matrix mat 5955 5956 For Set1, j1[] contains column indices of the nonzeros. 5957 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5958 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 5959 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 5960 5961 Similar for Set2. 5962 5963 This routine merges the two sets of nonzeros row by row and removes repeats. 5964 5965 Output Parameters: (memories are allocated by the caller) 5966 5967 i[],j[]: the CSR of the merged matrix, which has m rows. 5968 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 5969 imap2[]: similar to imap1[], but for Set2. 5970 Note we order nonzeros row-by-row and from left to right. 5971 */ 5972 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 5973 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 5974 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 5975 { 5976 PetscInt r,m; /* Row index of mat */ 5977 PetscCount t,t1,t2,b1,e1,b2,e2; 5978 5979 PetscFunctionBegin; 5980 PetscCall(MatGetLocalSize(mat,&m,NULL)); 5981 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 5982 i[0] = 0; 5983 for (r=0; r<m; r++) { /* Do row by row merging */ 5984 b1 = rowBegin1[r]; 5985 e1 = rowEnd1[r]; 5986 b2 = rowBegin2[r]; 5987 e2 = rowEnd2[r]; 5988 while (b1 < e1 && b2 < e2) { 5989 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 5990 j[t] = j1[b1]; 5991 imap1[t1] = t; 5992 imap2[t2] = t; 5993 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 5994 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 5995 t1++; t2++; t++; 5996 } else if (j1[b1] < j2[b2]) { 5997 j[t] = j1[b1]; 5998 imap1[t1] = t; 5999 b1 += jmap1[t1+1] - jmap1[t1]; 6000 t1++; t++; 6001 } else { 6002 j[t] = j2[b2]; 6003 imap2[t2] = t; 6004 b2 += jmap2[t2+1] - jmap2[t2]; 6005 t2++; t++; 6006 } 6007 } 6008 /* Merge the remaining in either j1[] or j2[] */ 6009 while (b1 < e1) { 6010 j[t] = j1[b1]; 6011 imap1[t1] = t; 6012 b1 += jmap1[t1+1] - jmap1[t1]; 6013 t1++; t++; 6014 } 6015 while (b2 < e2) { 6016 j[t] = j2[b2]; 6017 imap2[t2] = t; 6018 b2 += jmap2[t2+1] - jmap2[t2]; 6019 t2++; t++; 6020 } 6021 i[r+1] = t; 6022 } 6023 PetscFunctionReturn(0); 6024 } 6025 6026 /* Split a set/group of local entries into two subsets: those in the diagonal block and those in the off-diagonal block 6027 6028 Input Parameters: 6029 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6030 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6031 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6032 6033 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6034 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6035 6036 Output Parameters: 6037 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6038 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6039 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6040 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6041 6042 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6043 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6044 repeats (i.e., same 'i,j' pair). 6045 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6046 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6047 6048 Atot: number of entries belonging to the diagonal block 6049 Annz: number of unique nonzeros belonging to the diagonal block. 6050 6051 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6052 6053 Aperm[],Bperm[],Ajmap[],Bjmap[] are allocated by this routine with PetscMalloc4(). One has to free them with PetscFree4() in the exact order. 6054 */ 6055 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6056 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6057 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6058 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6059 { 6060 PetscInt cstart,cend,rstart,rend,row,col; 6061 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6062 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6063 PetscCount k,m,p,q,r,s,mid; 6064 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6065 6066 PetscFunctionBegin; 6067 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6068 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6069 m = rend - rstart; 6070 6071 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6072 6073 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6074 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6075 */ 6076 while (k<n) { 6077 row = i[k]; 6078 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6079 for (s=k; s<n; s++) if (i[s] != row) break; 6080 for (p=k; p<s; p++) { 6081 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6082 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6083 } 6084 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6085 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Seperate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6086 rowBegin[row-rstart] = k; 6087 rowMid[row-rstart] = mid; 6088 rowEnd[row-rstart] = s; 6089 6090 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6091 Atot += mid - k; 6092 Btot += s - mid; 6093 6094 /* Count unique nonzeros of this diag/offdiag row */ 6095 for (p=k; p<mid;) { 6096 col = j[p]; 6097 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6098 Annz++; 6099 } 6100 6101 for (p=mid; p<s;) { 6102 col = j[p]; 6103 do {p++;} while (p<s && j[p] == col); 6104 Bnnz++; 6105 } 6106 k = s; 6107 } 6108 6109 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6110 PetscCall(PetscMalloc4(Atot,&Aperm,Btot,&Bperm,Annz+1,&Ajmap,Bnnz+1,&Bjmap)); 6111 6112 /* Re-scan indices and copy diag/offdiag permuation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6113 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6114 for (r=0; r<m; r++) { 6115 k = rowBegin[r]; 6116 mid = rowMid[r]; 6117 s = rowEnd[r]; 6118 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6119 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6120 Atot += mid - k; 6121 Btot += s - mid; 6122 6123 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6124 for (p=k; p<mid;) { 6125 col = j[p]; 6126 q = p; 6127 do {p++;} while (p<mid && j[p] == col); 6128 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6129 Annz++; 6130 } 6131 6132 for (p=mid; p<s;) { 6133 col = j[p]; 6134 q = p; 6135 do {p++;} while (p<s && j[p] == col); 6136 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6137 Bnnz++; 6138 } 6139 } 6140 /* Output */ 6141 *Aperm_ = Aperm; 6142 *Annz_ = Annz; 6143 *Atot_ = Atot; 6144 *Ajmap_ = Ajmap; 6145 *Bperm_ = Bperm; 6146 *Bnnz_ = Bnnz; 6147 *Btot_ = Btot; 6148 *Bjmap_ = Bjmap; 6149 PetscFunctionReturn(0); 6150 } 6151 6152 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6153 { 6154 MPI_Comm comm; 6155 PetscMPIInt rank,size; 6156 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6157 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6158 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6159 6160 PetscFunctionBegin; 6161 PetscCall(PetscFree(mpiaij->garray)); 6162 PetscCall(VecDestroy(&mpiaij->lvec)); 6163 #if defined(PETSC_USE_CTABLE) 6164 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6165 #else 6166 PetscCall(PetscFree(mpiaij->colmap)); 6167 #endif 6168 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6169 mat->assembled = PETSC_FALSE; 6170 mat->was_assembled = PETSC_FALSE; 6171 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6172 6173 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6174 PetscCallMPI(MPI_Comm_size(comm,&size)); 6175 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6176 PetscCall(PetscLayoutSetUp(mat->rmap)); 6177 PetscCall(PetscLayoutSetUp(mat->cmap)); 6178 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6179 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6180 PetscCall(MatGetLocalSize(mat,&m,&n)); 6181 PetscCall(MatGetSize(mat,&M,&N)); 6182 6183 /* ---------------------------------------------------------------------------*/ 6184 /* Sort (i,j) by row along with a permuation array, so that the to-be-ignored */ 6185 /* entries come first, then local rows, then remote rows. */ 6186 /* ---------------------------------------------------------------------------*/ 6187 PetscCount n1 = coo_n,*perm1; 6188 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6189 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6190 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6191 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6192 for (k=0; k<n1; k++) perm1[k] = k; 6193 6194 /* Manipulate indices so that entries with negative row or col indices will have smallest 6195 row indices, local entries will have greater but negative row indices, and remote entries 6196 will have positive row indices. 6197 */ 6198 for (k=0; k<n1; k++) { 6199 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6200 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6201 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6202 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6203 } 6204 6205 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6206 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6207 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6208 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6209 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6210 6211 /* ---------------------------------------------------------------------------*/ 6212 /* Split local rows into diag/offdiag portions */ 6213 /* ---------------------------------------------------------------------------*/ 6214 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6215 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6216 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6217 6218 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6219 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6220 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6221 6222 /* ---------------------------------------------------------------------------*/ 6223 /* Send remote rows to their owner */ 6224 /* ---------------------------------------------------------------------------*/ 6225 /* Find which rows should be sent to which remote ranks*/ 6226 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6227 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6228 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6229 const PetscInt *ranges; 6230 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6231 6232 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6233 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6234 for (k=rem; k<n1;) { 6235 PetscMPIInt owner; 6236 PetscInt firstRow,lastRow; 6237 6238 /* Locate a row range */ 6239 firstRow = i1[k]; /* first row of this owner */ 6240 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6241 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6242 6243 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6244 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6245 6246 /* All entries in [k,p) belong to this remote owner */ 6247 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6248 PetscMPIInt *sendto2; 6249 PetscInt *nentries2; 6250 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6251 6252 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6253 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6254 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6255 PetscCall(PetscFree2(sendto,nentries2)); 6256 sendto = sendto2; 6257 nentries = nentries2; 6258 maxNsend = maxNsend2; 6259 } 6260 sendto[nsend] = owner; 6261 nentries[nsend] = p - k; 6262 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6263 nsend++; 6264 k = p; 6265 } 6266 6267 /* Build 1st SF to know offsets on remote to send data */ 6268 PetscSF sf1; 6269 PetscInt nroots = 1,nroots2 = 0; 6270 PetscInt nleaves = nsend,nleaves2 = 0; 6271 PetscInt *offsets; 6272 PetscSFNode *iremote; 6273 6274 PetscCall(PetscSFCreate(comm,&sf1)); 6275 PetscCall(PetscMalloc1(nsend,&iremote)); 6276 PetscCall(PetscMalloc1(nsend,&offsets)); 6277 for (k=0; k<nsend; k++) { 6278 iremote[k].rank = sendto[k]; 6279 iremote[k].index = 0; 6280 nleaves2 += nentries[k]; 6281 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6282 } 6283 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6284 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6285 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6286 PetscCall(PetscSFDestroy(&sf1)); 6287 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 " PetscInt_FMT " != number of remote entries " PetscCount_FMT "",nleaves2,n1-rem); 6288 6289 /* Build 2nd SF to send remote COOs to their owner */ 6290 PetscSF sf2; 6291 nroots = nroots2; 6292 nleaves = nleaves2; 6293 PetscCall(PetscSFCreate(comm,&sf2)); 6294 PetscCall(PetscSFSetFromOptions(sf2)); 6295 PetscCall(PetscMalloc1(nleaves,&iremote)); 6296 p = 0; 6297 for (k=0; k<nsend; k++) { 6298 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6299 for (q=0; q<nentries[k]; q++,p++) { 6300 iremote[p].rank = sendto[k]; 6301 iremote[p].index = offsets[k] + q; 6302 } 6303 } 6304 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6305 6306 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permuation which will be used to fill leafdata */ 6307 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6308 6309 /* Send the remote COOs to their owner */ 6310 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6311 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6312 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6313 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6314 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6315 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6316 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6317 6318 PetscCall(PetscFree(offsets)); 6319 PetscCall(PetscFree2(sendto,nentries)); 6320 6321 /* ---------------------------------------------------------------*/ 6322 /* Sort received COOs by row along with the permutation array */ 6323 /* ---------------------------------------------------------------*/ 6324 for (k=0; k<n2; k++) perm2[k] = k; 6325 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6326 6327 /* ---------------------------------------------------------------*/ 6328 /* Split received COOs into diag/offdiag portions */ 6329 /* ---------------------------------------------------------------*/ 6330 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6331 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6332 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6333 6334 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6335 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6336 6337 /* --------------------------------------------------------------------------*/ 6338 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6339 /* --------------------------------------------------------------------------*/ 6340 PetscInt *Ai,*Bi; 6341 PetscInt *Aj,*Bj; 6342 6343 PetscCall(PetscMalloc1(m+1,&Ai)); 6344 PetscCall(PetscMalloc1(m+1,&Bi)); 6345 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6346 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6347 6348 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6349 PetscCall(PetscMalloc4(Annz1,&Aimap1,Bnnz1,&Bimap1,Annz2,&Aimap2,Bnnz2,&Bimap2)); 6350 6351 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6352 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6353 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6354 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6355 PetscCall(PetscFree3(i1,j1,perm1)); 6356 PetscCall(PetscFree3(i2,j2,perm2)); 6357 6358 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6359 PetscInt Annz = Ai[m]; 6360 PetscInt Bnnz = Bi[m]; 6361 if (Annz < Annz1 + Annz2) { 6362 PetscInt *Aj_new; 6363 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6364 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6365 PetscCall(PetscFree(Aj)); 6366 Aj = Aj_new; 6367 } 6368 6369 if (Bnnz < Bnnz1 + Bnnz2) { 6370 PetscInt *Bj_new; 6371 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6372 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6373 PetscCall(PetscFree(Bj)); 6374 Bj = Bj_new; 6375 } 6376 6377 /* --------------------------------------------------------------------------------*/ 6378 /* Create new submatrices for on-process and off-process coupling */ 6379 /* --------------------------------------------------------------------------------*/ 6380 PetscScalar *Aa,*Ba; 6381 MatType rtype; 6382 Mat_SeqAIJ *a,*b; 6383 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6384 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6385 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6386 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6387 PetscCall(MatDestroy(&mpiaij->A)); 6388 PetscCall(MatDestroy(&mpiaij->B)); 6389 PetscCall(MatGetRootType_Private(mat,&rtype)); 6390 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6391 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6392 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6393 6394 a = (Mat_SeqAIJ*)mpiaij->A->data; 6395 b = (Mat_SeqAIJ*)mpiaij->B->data; 6396 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6397 a->free_a = b->free_a = PETSC_TRUE; 6398 a->free_ij = b->free_ij = PETSC_TRUE; 6399 6400 /* conversion must happen AFTER multiply setup */ 6401 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6402 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6403 PetscCall(VecDestroy(&mpiaij->lvec)); 6404 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6405 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6406 6407 mpiaij->coo_n = coo_n; 6408 mpiaij->coo_sf = sf2; 6409 mpiaij->sendlen = nleaves; 6410 mpiaij->recvlen = nroots; 6411 6412 mpiaij->Annz1 = Annz1; 6413 mpiaij->Annz2 = Annz2; 6414 mpiaij->Bnnz1 = Bnnz1; 6415 mpiaij->Bnnz2 = Bnnz2; 6416 6417 mpiaij->Atot1 = Atot1; 6418 mpiaij->Atot2 = Atot2; 6419 mpiaij->Btot1 = Btot1; 6420 mpiaij->Btot2 = Btot2; 6421 6422 mpiaij->Aimap1 = Aimap1; 6423 mpiaij->Aimap2 = Aimap2; 6424 mpiaij->Bimap1 = Bimap1; 6425 mpiaij->Bimap2 = Bimap2; 6426 6427 mpiaij->Ajmap1 = Ajmap1; 6428 mpiaij->Ajmap2 = Ajmap2; 6429 mpiaij->Bjmap1 = Bjmap1; 6430 mpiaij->Bjmap2 = Bjmap2; 6431 6432 mpiaij->Aperm1 = Aperm1; 6433 mpiaij->Aperm2 = Aperm2; 6434 mpiaij->Bperm1 = Bperm1; 6435 mpiaij->Bperm2 = Bperm2; 6436 6437 mpiaij->Cperm1 = Cperm1; 6438 6439 /* Allocate in preallocation. If not used, it has zero cost on host */ 6440 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6441 PetscFunctionReturn(0); 6442 } 6443 6444 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6445 { 6446 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6447 Mat A = mpiaij->A,B = mpiaij->B; 6448 PetscCount Annz1 = mpiaij->Annz1,Annz2 = mpiaij->Annz2,Bnnz1 = mpiaij->Bnnz1,Bnnz2 = mpiaij->Bnnz2; 6449 PetscScalar *Aa,*Ba; 6450 PetscScalar *sendbuf = mpiaij->sendbuf; 6451 PetscScalar *recvbuf = mpiaij->recvbuf; 6452 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap1 = mpiaij->Aimap1,*Aimap2 = mpiaij->Aimap2; 6453 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap1 = mpiaij->Bimap1,*Bimap2 = mpiaij->Bimap2; 6454 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6455 const PetscCount *Cperm1 = mpiaij->Cperm1; 6456 6457 PetscFunctionBegin; 6458 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6459 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6460 if (imode == INSERT_VALUES) { 6461 PetscCall(PetscMemzero(Aa,((Mat_SeqAIJ*)A->data)->nz*sizeof(PetscScalar))); 6462 PetscCall(PetscMemzero(Ba,((Mat_SeqAIJ*)B->data)->nz*sizeof(PetscScalar))); 6463 } 6464 6465 /* Pack entries to be sent to remote */ 6466 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6467 6468 /* Send remote entries to their owner and overlap the communication with local computation */ 6469 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6470 /* Add local entries to A and B */ 6471 for (PetscCount i=0; i<Annz1; i++) { 6472 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) Aa[Aimap1[i]] += v[Aperm1[k]]; 6473 } 6474 for (PetscCount i=0; i<Bnnz1; i++) { 6475 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) Ba[Bimap1[i]] += v[Bperm1[k]]; 6476 } 6477 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6478 6479 /* Add received remote entries to A and B */ 6480 for (PetscCount i=0; i<Annz2; i++) { 6481 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6482 } 6483 for (PetscCount i=0; i<Bnnz2; i++) { 6484 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6485 } 6486 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6487 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6488 PetscFunctionReturn(0); 6489 } 6490 6491 /* ----------------------------------------------------------------*/ 6492 6493 /*MC 6494 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6495 6496 Options Database Keys: 6497 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6498 6499 Level: beginner 6500 6501 Notes: 6502 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6503 in this case the values associated with the rows and columns one passes in are set to zero 6504 in the matrix 6505 6506 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6507 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6508 6509 .seealso: MatCreateAIJ() 6510 M*/ 6511 6512 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6513 { 6514 Mat_MPIAIJ *b; 6515 PetscMPIInt size; 6516 6517 PetscFunctionBegin; 6518 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6519 6520 PetscCall(PetscNewLog(B,&b)); 6521 B->data = (void*)b; 6522 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6523 B->assembled = PETSC_FALSE; 6524 B->insertmode = NOT_SET_VALUES; 6525 b->size = size; 6526 6527 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6528 6529 /* build cache for off array entries formed */ 6530 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6531 6532 b->donotstash = PETSC_FALSE; 6533 b->colmap = NULL; 6534 b->garray = NULL; 6535 b->roworiented = PETSC_TRUE; 6536 6537 /* stuff used for matrix vector multiply */ 6538 b->lvec = NULL; 6539 b->Mvctx = NULL; 6540 6541 /* stuff for MatGetRow() */ 6542 b->rowindices = NULL; 6543 b->rowvalues = NULL; 6544 b->getrowactive = PETSC_FALSE; 6545 6546 /* flexible pointer used in CUSPARSE classes */ 6547 b->spptr = NULL; 6548 6549 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6550 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6551 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6552 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6553 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6554 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6555 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6556 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6557 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6558 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6559 #if defined(PETSC_HAVE_CUDA) 6560 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6561 #endif 6562 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6563 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6564 #endif 6565 #if defined(PETSC_HAVE_MKL_SPARSE) 6566 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6567 #endif 6568 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6569 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6570 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6571 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6572 #if defined(PETSC_HAVE_ELEMENTAL) 6573 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6574 #endif 6575 #if defined(PETSC_HAVE_SCALAPACK) 6576 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6577 #endif 6578 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6579 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6580 #if defined(PETSC_HAVE_HYPRE) 6581 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6582 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6583 #endif 6584 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6585 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6586 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6587 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6588 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6589 PetscFunctionReturn(0); 6590 } 6591 6592 /*@C 6593 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6594 and "off-diagonal" part of the matrix in CSR format. 6595 6596 Collective 6597 6598 Input Parameters: 6599 + comm - MPI communicator 6600 . m - number of local rows (Cannot be PETSC_DECIDE) 6601 . n - This value should be the same as the local size used in creating the 6602 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6603 calculated if N is given) For square matrices n is almost always m. 6604 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6605 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6606 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6607 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6608 . a - matrix values 6609 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6610 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6611 - oa - matrix values 6612 6613 Output Parameter: 6614 . mat - the matrix 6615 6616 Level: advanced 6617 6618 Notes: 6619 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6620 must free the arrays once the matrix has been destroyed and not before. 6621 6622 The i and j indices are 0 based 6623 6624 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6625 6626 This sets local rows and cannot be used to set off-processor values. 6627 6628 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6629 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6630 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6631 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6632 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6633 communication if it is known that only local entries will be set. 6634 6635 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6636 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6637 @*/ 6638 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6639 { 6640 Mat_MPIAIJ *maij; 6641 6642 PetscFunctionBegin; 6643 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6644 PetscCheckFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6645 PetscCheckFalse(oi[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6646 PetscCall(MatCreate(comm,mat)); 6647 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6648 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6649 maij = (Mat_MPIAIJ*) (*mat)->data; 6650 6651 (*mat)->preallocated = PETSC_TRUE; 6652 6653 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6654 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6655 6656 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6657 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6658 6659 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6660 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6661 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6662 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6663 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6664 PetscFunctionReturn(0); 6665 } 6666 6667 typedef struct { 6668 Mat *mp; /* intermediate products */ 6669 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6670 PetscInt cp; /* number of intermediate products */ 6671 6672 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6673 PetscInt *startsj_s,*startsj_r; 6674 PetscScalar *bufa; 6675 Mat P_oth; 6676 6677 /* may take advantage of merging product->B */ 6678 Mat Bloc; /* B-local by merging diag and off-diag */ 6679 6680 /* cusparse does not have support to split between symbolic and numeric phases. 6681 When api_user is true, we don't need to update the numerical values 6682 of the temporary storage */ 6683 PetscBool reusesym; 6684 6685 /* support for COO values insertion */ 6686 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6687 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6688 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6689 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6690 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6691 PetscMemType mtype; 6692 6693 /* customization */ 6694 PetscBool abmerge; 6695 PetscBool P_oth_bind; 6696 } MatMatMPIAIJBACKEND; 6697 6698 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6699 { 6700 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6701 PetscInt i; 6702 6703 PetscFunctionBegin; 6704 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6705 PetscCall(PetscFree(mmdata->bufa)); 6706 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6707 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6708 PetscCall(MatDestroy(&mmdata->P_oth)); 6709 PetscCall(MatDestroy(&mmdata->Bloc)); 6710 PetscCall(PetscSFDestroy(&mmdata->sf)); 6711 for (i = 0; i < mmdata->cp; i++) { 6712 PetscCall(MatDestroy(&mmdata->mp[i])); 6713 } 6714 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6715 PetscCall(PetscFree(mmdata->own[0])); 6716 PetscCall(PetscFree(mmdata->own)); 6717 PetscCall(PetscFree(mmdata->off[0])); 6718 PetscCall(PetscFree(mmdata->off)); 6719 PetscCall(PetscFree(mmdata)); 6720 PetscFunctionReturn(0); 6721 } 6722 6723 /* Copy selected n entries with indices in idx[] of A to v[]. 6724 If idx is NULL, copy the whole data array of A to v[] 6725 */ 6726 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6727 { 6728 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6729 6730 PetscFunctionBegin; 6731 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6732 if (f) { 6733 PetscCall((*f)(A,n,idx,v)); 6734 } else { 6735 const PetscScalar *vv; 6736 6737 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6738 if (n && idx) { 6739 PetscScalar *w = v; 6740 const PetscInt *oi = idx; 6741 PetscInt j; 6742 6743 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6744 } else { 6745 PetscCall(PetscArraycpy(v,vv,n)); 6746 } 6747 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6748 } 6749 PetscFunctionReturn(0); 6750 } 6751 6752 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6753 { 6754 MatMatMPIAIJBACKEND *mmdata; 6755 PetscInt i,n_d,n_o; 6756 6757 PetscFunctionBegin; 6758 MatCheckProduct(C,1); 6759 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6760 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6761 if (!mmdata->reusesym) { /* update temporary matrices */ 6762 if (mmdata->P_oth) { 6763 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6764 } 6765 if (mmdata->Bloc) { 6766 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6767 } 6768 } 6769 mmdata->reusesym = PETSC_FALSE; 6770 6771 for (i = 0; i < mmdata->cp; i++) { 6772 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6773 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6774 } 6775 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6776 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6777 6778 if (mmdata->mptmp[i]) continue; 6779 if (noff) { 6780 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6781 6782 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6783 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6784 n_o += noff; 6785 n_d += nown; 6786 } else { 6787 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6788 6789 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6790 n_d += mm->nz; 6791 } 6792 } 6793 if (mmdata->hasoffproc) { /* offprocess insertion */ 6794 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6795 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6796 } 6797 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6798 PetscFunctionReturn(0); 6799 } 6800 6801 /* Support for Pt * A, A * P, or Pt * A * P */ 6802 #define MAX_NUMBER_INTERMEDIATE 4 6803 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6804 { 6805 Mat_Product *product = C->product; 6806 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6807 Mat_MPIAIJ *a,*p; 6808 MatMatMPIAIJBACKEND *mmdata; 6809 ISLocalToGlobalMapping P_oth_l2g = NULL; 6810 IS glob = NULL; 6811 const char *prefix; 6812 char pprefix[256]; 6813 const PetscInt *globidx,*P_oth_idx; 6814 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6815 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6816 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6817 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6818 /* a base offset; type-2: sparse with a local to global map table */ 6819 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6820 6821 MatProductType ptype; 6822 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6823 PetscMPIInt size; 6824 6825 PetscFunctionBegin; 6826 MatCheckProduct(C,1); 6827 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6828 ptype = product->type; 6829 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6830 ptype = MATPRODUCT_AB; 6831 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6832 } 6833 switch (ptype) { 6834 case MATPRODUCT_AB: 6835 A = product->A; 6836 P = product->B; 6837 m = A->rmap->n; 6838 n = P->cmap->n; 6839 M = A->rmap->N; 6840 N = P->cmap->N; 6841 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6842 break; 6843 case MATPRODUCT_AtB: 6844 P = product->A; 6845 A = product->B; 6846 m = P->cmap->n; 6847 n = A->cmap->n; 6848 M = P->cmap->N; 6849 N = A->cmap->N; 6850 hasoffproc = PETSC_TRUE; 6851 break; 6852 case MATPRODUCT_PtAP: 6853 A = product->A; 6854 P = product->B; 6855 m = P->cmap->n; 6856 n = P->cmap->n; 6857 M = P->cmap->N; 6858 N = P->cmap->N; 6859 hasoffproc = PETSC_TRUE; 6860 break; 6861 default: 6862 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6863 } 6864 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6865 if (size == 1) hasoffproc = PETSC_FALSE; 6866 6867 /* defaults */ 6868 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6869 mp[i] = NULL; 6870 mptmp[i] = PETSC_FALSE; 6871 rmapt[i] = -1; 6872 cmapt[i] = -1; 6873 rmapa[i] = NULL; 6874 cmapa[i] = NULL; 6875 } 6876 6877 /* customization */ 6878 PetscCall(PetscNew(&mmdata)); 6879 mmdata->reusesym = product->api_user; 6880 if (ptype == MATPRODUCT_AB) { 6881 if (product->api_user) { 6882 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6883 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6884 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6885 PetscOptionsEnd(); 6886 } else { 6887 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6888 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6889 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6890 PetscOptionsEnd(); 6891 } 6892 } else if (ptype == MATPRODUCT_PtAP) { 6893 if (product->api_user) { 6894 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 6895 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6896 PetscOptionsEnd(); 6897 } else { 6898 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 6899 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6900 PetscOptionsEnd(); 6901 } 6902 } 6903 a = (Mat_MPIAIJ*)A->data; 6904 p = (Mat_MPIAIJ*)P->data; 6905 PetscCall(MatSetSizes(C,m,n,M,N)); 6906 PetscCall(PetscLayoutSetUp(C->rmap)); 6907 PetscCall(PetscLayoutSetUp(C->cmap)); 6908 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6909 PetscCall(MatGetOptionsPrefix(C,&prefix)); 6910 6911 cp = 0; 6912 switch (ptype) { 6913 case MATPRODUCT_AB: /* A * P */ 6914 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6915 6916 /* A_diag * P_local (merged or not) */ 6917 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 6918 /* P is product->B */ 6919 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6920 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 6921 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6922 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6923 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6924 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6925 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6926 mp[cp]->product->api_user = product->api_user; 6927 PetscCall(MatProductSetFromOptions(mp[cp])); 6928 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6929 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6930 PetscCall(ISGetIndices(glob,&globidx)); 6931 rmapt[cp] = 1; 6932 cmapt[cp] = 2; 6933 cmapa[cp] = globidx; 6934 mptmp[cp] = PETSC_FALSE; 6935 cp++; 6936 } else { /* A_diag * P_diag and A_diag * P_off */ 6937 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 6938 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6939 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6940 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6941 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6942 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6943 mp[cp]->product->api_user = product->api_user; 6944 PetscCall(MatProductSetFromOptions(mp[cp])); 6945 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6946 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6947 rmapt[cp] = 1; 6948 cmapt[cp] = 1; 6949 mptmp[cp] = PETSC_FALSE; 6950 cp++; 6951 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 6952 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6953 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6954 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6955 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6956 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6957 mp[cp]->product->api_user = product->api_user; 6958 PetscCall(MatProductSetFromOptions(mp[cp])); 6959 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6960 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6961 rmapt[cp] = 1; 6962 cmapt[cp] = 2; 6963 cmapa[cp] = p->garray; 6964 mptmp[cp] = PETSC_FALSE; 6965 cp++; 6966 } 6967 6968 /* A_off * P_other */ 6969 if (mmdata->P_oth) { 6970 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 6971 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 6972 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 6973 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 6974 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 6975 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 6976 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6977 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 6978 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 6979 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 6980 mp[cp]->product->api_user = product->api_user; 6981 PetscCall(MatProductSetFromOptions(mp[cp])); 6982 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 6983 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 6984 rmapt[cp] = 1; 6985 cmapt[cp] = 2; 6986 cmapa[cp] = P_oth_idx; 6987 mptmp[cp] = PETSC_FALSE; 6988 cp++; 6989 } 6990 break; 6991 6992 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 6993 /* A is product->B */ 6994 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 6995 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 6996 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 6997 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 6998 PetscCall(MatProductSetFill(mp[cp],product->fill)); 6999 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7000 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7001 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7002 mp[cp]->product->api_user = product->api_user; 7003 PetscCall(MatProductSetFromOptions(mp[cp])); 7004 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7005 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7006 PetscCall(ISGetIndices(glob,&globidx)); 7007 rmapt[cp] = 2; 7008 rmapa[cp] = globidx; 7009 cmapt[cp] = 2; 7010 cmapa[cp] = globidx; 7011 mptmp[cp] = PETSC_FALSE; 7012 cp++; 7013 } else { 7014 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7015 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7016 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7017 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7018 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7019 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7020 mp[cp]->product->api_user = product->api_user; 7021 PetscCall(MatProductSetFromOptions(mp[cp])); 7022 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7023 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7024 PetscCall(ISGetIndices(glob,&globidx)); 7025 rmapt[cp] = 1; 7026 cmapt[cp] = 2; 7027 cmapa[cp] = globidx; 7028 mptmp[cp] = PETSC_FALSE; 7029 cp++; 7030 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7031 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7032 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7033 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7034 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7035 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7036 mp[cp]->product->api_user = product->api_user; 7037 PetscCall(MatProductSetFromOptions(mp[cp])); 7038 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7039 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7040 rmapt[cp] = 2; 7041 rmapa[cp] = p->garray; 7042 cmapt[cp] = 2; 7043 cmapa[cp] = globidx; 7044 mptmp[cp] = PETSC_FALSE; 7045 cp++; 7046 } 7047 break; 7048 case MATPRODUCT_PtAP: 7049 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7050 /* P is product->B */ 7051 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7052 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7053 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7054 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7055 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7056 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7057 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7058 mp[cp]->product->api_user = product->api_user; 7059 PetscCall(MatProductSetFromOptions(mp[cp])); 7060 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7061 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7062 PetscCall(ISGetIndices(glob,&globidx)); 7063 rmapt[cp] = 2; 7064 rmapa[cp] = globidx; 7065 cmapt[cp] = 2; 7066 cmapa[cp] = globidx; 7067 mptmp[cp] = PETSC_FALSE; 7068 cp++; 7069 if (mmdata->P_oth) { 7070 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7071 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7072 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7073 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7074 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7075 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7076 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7077 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7078 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7079 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7080 mp[cp]->product->api_user = product->api_user; 7081 PetscCall(MatProductSetFromOptions(mp[cp])); 7082 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7083 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7084 mptmp[cp] = PETSC_TRUE; 7085 cp++; 7086 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7087 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7088 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7089 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7090 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7091 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7092 mp[cp]->product->api_user = product->api_user; 7093 PetscCall(MatProductSetFromOptions(mp[cp])); 7094 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7095 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7096 rmapt[cp] = 2; 7097 rmapa[cp] = globidx; 7098 cmapt[cp] = 2; 7099 cmapa[cp] = P_oth_idx; 7100 mptmp[cp] = PETSC_FALSE; 7101 cp++; 7102 } 7103 break; 7104 default: 7105 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7106 } 7107 /* sanity check */ 7108 if (size > 1) for (i = 0; i < cp; i++) PetscCheckFalse(rmapt[i] == 2 && !hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7109 7110 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7111 for (i = 0; i < cp; i++) { 7112 mmdata->mp[i] = mp[i]; 7113 mmdata->mptmp[i] = mptmp[i]; 7114 } 7115 mmdata->cp = cp; 7116 C->product->data = mmdata; 7117 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7118 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7119 7120 /* memory type */ 7121 mmdata->mtype = PETSC_MEMTYPE_HOST; 7122 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7123 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7124 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7125 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7126 7127 /* prepare coo coordinates for values insertion */ 7128 7129 /* count total nonzeros of those intermediate seqaij Mats 7130 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7131 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7132 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7133 */ 7134 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7135 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7136 if (mptmp[cp]) continue; 7137 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7138 const PetscInt *rmap = rmapa[cp]; 7139 const PetscInt mr = mp[cp]->rmap->n; 7140 const PetscInt rs = C->rmap->rstart; 7141 const PetscInt re = C->rmap->rend; 7142 const PetscInt *ii = mm->i; 7143 for (i = 0; i < mr; i++) { 7144 const PetscInt gr = rmap[i]; 7145 const PetscInt nz = ii[i+1] - ii[i]; 7146 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7147 else ncoo_oown += nz; /* this row is local */ 7148 } 7149 } else ncoo_d += mm->nz; 7150 } 7151 7152 /* 7153 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7154 7155 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7156 7157 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7158 7159 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7160 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7161 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7162 7163 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7164 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7165 */ 7166 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7167 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7168 7169 /* gather (i,j) of nonzeros inserted by remote procs */ 7170 if (hasoffproc) { 7171 PetscSF msf; 7172 PetscInt ncoo2,*coo_i2,*coo_j2; 7173 7174 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7175 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7176 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7177 7178 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7179 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7180 PetscInt *idxoff = mmdata->off[cp]; 7181 PetscInt *idxown = mmdata->own[cp]; 7182 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7183 const PetscInt *rmap = rmapa[cp]; 7184 const PetscInt *cmap = cmapa[cp]; 7185 const PetscInt *ii = mm->i; 7186 PetscInt *coi = coo_i + ncoo_o; 7187 PetscInt *coj = coo_j + ncoo_o; 7188 const PetscInt mr = mp[cp]->rmap->n; 7189 const PetscInt rs = C->rmap->rstart; 7190 const PetscInt re = C->rmap->rend; 7191 const PetscInt cs = C->cmap->rstart; 7192 for (i = 0; i < mr; i++) { 7193 const PetscInt *jj = mm->j + ii[i]; 7194 const PetscInt gr = rmap[i]; 7195 const PetscInt nz = ii[i+1] - ii[i]; 7196 if (gr < rs || gr >= re) { /* this is an offproc row */ 7197 for (j = ii[i]; j < ii[i+1]; j++) { 7198 *coi++ = gr; 7199 *idxoff++ = j; 7200 } 7201 if (!cmapt[cp]) { /* already global */ 7202 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7203 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7204 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7205 } else { /* offdiag */ 7206 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7207 } 7208 ncoo_o += nz; 7209 } else { /* this is a local row */ 7210 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7211 } 7212 } 7213 } 7214 mmdata->off[cp + 1] = idxoff; 7215 mmdata->own[cp + 1] = idxown; 7216 } 7217 7218 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7219 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7220 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7221 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7222 ncoo = ncoo_d + ncoo_oown + ncoo2; 7223 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7224 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7225 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7226 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7227 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7228 PetscCall(PetscFree2(coo_i,coo_j)); 7229 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7230 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7231 coo_i = coo_i2; 7232 coo_j = coo_j2; 7233 } else { /* no offproc values insertion */ 7234 ncoo = ncoo_d; 7235 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7236 7237 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7238 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7239 PetscCall(PetscSFSetUp(mmdata->sf)); 7240 } 7241 mmdata->hasoffproc = hasoffproc; 7242 7243 /* gather (i,j) of nonzeros inserted locally */ 7244 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7245 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7246 PetscInt *coi = coo_i + ncoo_d; 7247 PetscInt *coj = coo_j + ncoo_d; 7248 const PetscInt *jj = mm->j; 7249 const PetscInt *ii = mm->i; 7250 const PetscInt *cmap = cmapa[cp]; 7251 const PetscInt *rmap = rmapa[cp]; 7252 const PetscInt mr = mp[cp]->rmap->n; 7253 const PetscInt rs = C->rmap->rstart; 7254 const PetscInt re = C->rmap->rend; 7255 const PetscInt cs = C->cmap->rstart; 7256 7257 if (mptmp[cp]) continue; 7258 if (rmapt[cp] == 1) { /* consecutive rows */ 7259 /* fill coo_i */ 7260 for (i = 0; i < mr; i++) { 7261 const PetscInt gr = i + rs; 7262 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7263 } 7264 /* fill coo_j */ 7265 if (!cmapt[cp]) { /* type-0, already global */ 7266 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7267 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7268 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7269 } else { /* type-2, local to global for sparse columns */ 7270 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7271 } 7272 ncoo_d += mm->nz; 7273 } else if (rmapt[cp] == 2) { /* sparse rows */ 7274 for (i = 0; i < mr; i++) { 7275 const PetscInt *jj = mm->j + ii[i]; 7276 const PetscInt gr = rmap[i]; 7277 const PetscInt nz = ii[i+1] - ii[i]; 7278 if (gr >= rs && gr < re) { /* local rows */ 7279 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7280 if (!cmapt[cp]) { /* type-0, already global */ 7281 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7282 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7283 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7284 } else { /* type-2, local to global for sparse columns */ 7285 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7286 } 7287 ncoo_d += nz; 7288 } 7289 } 7290 } 7291 } 7292 if (glob) { 7293 PetscCall(ISRestoreIndices(glob,&globidx)); 7294 } 7295 PetscCall(ISDestroy(&glob)); 7296 if (P_oth_l2g) { 7297 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7298 } 7299 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7300 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7301 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7302 7303 /* preallocate with COO data */ 7304 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7305 PetscCall(PetscFree2(coo_i,coo_j)); 7306 PetscFunctionReturn(0); 7307 } 7308 7309 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7310 { 7311 Mat_Product *product = mat->product; 7312 #if defined(PETSC_HAVE_DEVICE) 7313 PetscBool match = PETSC_FALSE; 7314 PetscBool usecpu = PETSC_FALSE; 7315 #else 7316 PetscBool match = PETSC_TRUE; 7317 #endif 7318 7319 PetscFunctionBegin; 7320 MatCheckProduct(mat,1); 7321 #if defined(PETSC_HAVE_DEVICE) 7322 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7323 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7324 } 7325 if (match) { /* we can always fallback to the CPU if requested */ 7326 switch (product->type) { 7327 case MATPRODUCT_AB: 7328 if (product->api_user) { 7329 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7330 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7331 PetscOptionsEnd(); 7332 } else { 7333 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7334 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7335 PetscOptionsEnd(); 7336 } 7337 break; 7338 case MATPRODUCT_AtB: 7339 if (product->api_user) { 7340 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7341 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7342 PetscOptionsEnd(); 7343 } else { 7344 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7345 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7346 PetscOptionsEnd(); 7347 } 7348 break; 7349 case MATPRODUCT_PtAP: 7350 if (product->api_user) { 7351 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7352 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7353 PetscOptionsEnd(); 7354 } else { 7355 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7356 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7357 PetscOptionsEnd(); 7358 } 7359 break; 7360 default: 7361 break; 7362 } 7363 match = (PetscBool)!usecpu; 7364 } 7365 #endif 7366 if (match) { 7367 switch (product->type) { 7368 case MATPRODUCT_AB: 7369 case MATPRODUCT_AtB: 7370 case MATPRODUCT_PtAP: 7371 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7372 break; 7373 default: 7374 break; 7375 } 7376 } 7377 /* fallback to MPIAIJ ops */ 7378 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7379 PetscFunctionReturn(0); 7380 } 7381 7382 /* 7383 Special version for direct calls from Fortran 7384 */ 7385 #include <petsc/private/fortranimpl.h> 7386 7387 /* Change these macros so can be used in void function */ 7388 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7389 #undef PetscCall 7390 #define PetscCall(...) do { \ 7391 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7392 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7393 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7394 return; \ 7395 } \ 7396 } while (0) 7397 7398 #undef SETERRQ 7399 #define SETERRQ(comm,ierr,...) do { \ 7400 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7401 return; \ 7402 } while (0) 7403 7404 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7405 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7406 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7407 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7408 #else 7409 #endif 7410 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7411 { 7412 Mat mat = *mmat; 7413 PetscInt m = *mm, n = *mn; 7414 InsertMode addv = *maddv; 7415 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7416 PetscScalar value; 7417 7418 MatCheckPreallocated(mat,1); 7419 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7420 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7421 { 7422 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7423 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7424 PetscBool roworiented = aij->roworiented; 7425 7426 /* Some Variables required in the macro */ 7427 Mat A = aij->A; 7428 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7429 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7430 MatScalar *aa; 7431 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7432 Mat B = aij->B; 7433 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7434 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7435 MatScalar *ba; 7436 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7437 * cannot use "#if defined" inside a macro. */ 7438 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7439 7440 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7441 PetscInt nonew = a->nonew; 7442 MatScalar *ap1,*ap2; 7443 7444 PetscFunctionBegin; 7445 PetscCall(MatSeqAIJGetArray(A,&aa)); 7446 PetscCall(MatSeqAIJGetArray(B,&ba)); 7447 for (i=0; i<m; i++) { 7448 if (im[i] < 0) continue; 7449 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7450 if (im[i] >= rstart && im[i] < rend) { 7451 row = im[i] - rstart; 7452 lastcol1 = -1; 7453 rp1 = aj + ai[row]; 7454 ap1 = aa + ai[row]; 7455 rmax1 = aimax[row]; 7456 nrow1 = ailen[row]; 7457 low1 = 0; 7458 high1 = nrow1; 7459 lastcol2 = -1; 7460 rp2 = bj + bi[row]; 7461 ap2 = ba + bi[row]; 7462 rmax2 = bimax[row]; 7463 nrow2 = bilen[row]; 7464 low2 = 0; 7465 high2 = nrow2; 7466 7467 for (j=0; j<n; j++) { 7468 if (roworiented) value = v[i*n+j]; 7469 else value = v[i+j*m]; 7470 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7471 if (in[j] >= cstart && in[j] < cend) { 7472 col = in[j] - cstart; 7473 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7474 } else if (in[j] < 0) continue; 7475 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7476 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7477 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 7478 } else { 7479 if (mat->was_assembled) { 7480 if (!aij->colmap) { 7481 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7482 } 7483 #if defined(PETSC_USE_CTABLE) 7484 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7485 col--; 7486 #else 7487 col = aij->colmap[in[j]] - 1; 7488 #endif 7489 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7490 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7491 col = in[j]; 7492 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7493 B = aij->B; 7494 b = (Mat_SeqAIJ*)B->data; 7495 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7496 rp2 = bj + bi[row]; 7497 ap2 = ba + bi[row]; 7498 rmax2 = bimax[row]; 7499 nrow2 = bilen[row]; 7500 low2 = 0; 7501 high2 = nrow2; 7502 bm = aij->B->rmap->n; 7503 ba = b->a; 7504 inserted = PETSC_FALSE; 7505 } 7506 } else col = in[j]; 7507 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7508 } 7509 } 7510 } else if (!aij->donotstash) { 7511 if (roworiented) { 7512 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7513 } else { 7514 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7515 } 7516 } 7517 } 7518 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7519 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7520 } 7521 PetscFunctionReturnVoid(); 7522 } 7523 /* Undefining these here since they were redefined from their original definition above! No 7524 * other PETSc functions should be defined past this point, as it is impossible to recover the 7525 * original definitions */ 7526 #undef PetscCall 7527 #undef SETERRQ 7528