1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 27 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 28 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 36 and MATMPIAIJ otherwise. As a result, for single process communicators, 37 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 43 44 Developer Notes: 45 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 57 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 58 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 97 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 107 const PetscInt *ia,*ib; 108 const MatScalar *aa,*bb,*aav,*bav; 109 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 110 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 119 for (i=0; i<m; i++) { 120 na = ia[i+1] - ia[i]; 121 nb = ib[i+1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j=0; j<na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j=0; j <nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 144 cnt = 0; 145 for (i=0; i<m; i++) { 146 na = ia[i+1] - ia[i]; 147 nb = ib[i+1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j=0; j<na;j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j=0; j<nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y,&cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A,D,is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y,D,is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 189 PetscInt i,rstart,nrows,*rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 194 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 195 for (i=0; i<nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 203 PetscInt i,m,n,*garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A,&m,&n)); 211 PetscCall(PetscCalloc1(n,&work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 216 if (type == NORM_2) { 217 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 218 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 219 } 220 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 221 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 222 } 223 } else if (type == NORM_1) { 224 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 225 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 226 } 227 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 228 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 229 } 230 } else if (type == NORM_INFINITY) { 231 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 232 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 233 } 234 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 235 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 236 } 237 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 238 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 239 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 240 } 241 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 242 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 243 } 244 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 245 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 246 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 247 } 248 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 249 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 250 } 251 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 252 if (type == NORM_INFINITY) { 253 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 254 } else { 255 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 256 } 257 PetscCall(PetscFree(work)); 258 if (type == NORM_2) { 259 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 260 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 261 for (i=0; i<n; i++) reductions[i] /= m; 262 } 263 PetscFunctionReturn(0); 264 } 265 266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 267 { 268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 269 IS sis,gis; 270 const PetscInt *isis,*igis; 271 PetscInt n,*iis,nsis,ngis,rstart,i; 272 273 PetscFunctionBegin; 274 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 275 PetscCall(MatFindNonzeroRows(a->B,&gis)); 276 PetscCall(ISGetSize(gis,&ngis)); 277 PetscCall(ISGetSize(sis,&nsis)); 278 PetscCall(ISGetIndices(sis,&isis)); 279 PetscCall(ISGetIndices(gis,&igis)); 280 281 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 282 PetscCall(PetscArraycpy(iis,igis,ngis)); 283 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 284 n = ngis + nsis; 285 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 286 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 287 for (i=0; i<n; i++) iis[i] += rstart; 288 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 289 290 PetscCall(ISRestoreIndices(sis,&isis)); 291 PetscCall(ISRestoreIndices(gis,&igis)); 292 PetscCall(ISDestroy(&sis)); 293 PetscCall(ISDestroy(&gis)); 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Local utility routine that creates a mapping from the global column 299 number to the local number in the off-diagonal part of the local 300 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 301 a slightly higher hash table cost; without it it is not scalable (each processor 302 has an order N integer array but is fast to access. 303 */ 304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscInt n = aij->B->cmap->n,i; 308 309 PetscFunctionBegin; 310 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 311 #if defined(PETSC_USE_CTABLE) 312 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 313 for (i=0; i<n; i++) { 314 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 315 } 316 #else 317 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 318 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 319 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 320 #endif 321 PetscFunctionReturn(0); 322 } 323 324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 325 { \ 326 if (col <= lastcol1) low1 = 0; \ 327 else high1 = nrow1; \ 328 lastcol1 = col;\ 329 while (high1-low1 > 5) { \ 330 t = (low1+high1)/2; \ 331 if (rp1[t] > col) high1 = t; \ 332 else low1 = t; \ 333 } \ 334 for (_i=low1; _i<high1; _i++) { \ 335 if (rp1[_i] > col) break; \ 336 if (rp1[_i] == col) { \ 337 if (addv == ADD_VALUES) { \ 338 ap1[_i] += value; \ 339 /* Not sure LogFlops will slow dow the code or not */ \ 340 (void)PetscLogFlops(1.0); \ 341 } \ 342 else ap1[_i] = value; \ 343 goto a_noinsert; \ 344 } \ 345 } \ 346 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 347 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 348 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 349 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 350 N = nrow1++ - 1; a->nz++; high1++; \ 351 /* shift up all the later entries in this row */ \ 352 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 353 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 354 rp1[_i] = col; \ 355 ap1[_i] = value; \ 356 A->nonzerostate++;\ 357 a_noinsert: ; \ 358 ailen[row] = nrow1; \ 359 } 360 361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 362 { \ 363 if (col <= lastcol2) low2 = 0; \ 364 else high2 = nrow2; \ 365 lastcol2 = col; \ 366 while (high2-low2 > 5) { \ 367 t = (low2+high2)/2; \ 368 if (rp2[t] > col) high2 = t; \ 369 else low2 = t; \ 370 } \ 371 for (_i=low2; _i<high2; _i++) { \ 372 if (rp2[_i] > col) break; \ 373 if (rp2[_i] == col) { \ 374 if (addv == ADD_VALUES) { \ 375 ap2[_i] += value; \ 376 (void)PetscLogFlops(1.0); \ 377 } \ 378 else ap2[_i] = value; \ 379 goto b_noinsert; \ 380 } \ 381 } \ 382 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 384 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 385 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 386 N = nrow2++ - 1; b->nz++; high2++; \ 387 /* shift up all the later entries in this row */ \ 388 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 389 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 390 rp2[_i] = col; \ 391 ap2[_i] = value; \ 392 B->nonzerostate++; \ 393 b_noinsert: ; \ 394 bilen[row] = nrow2; \ 395 } 396 397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 398 { 399 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 401 PetscInt l,*garray = mat->garray,diag; 402 PetscScalar *aa,*ba; 403 404 PetscFunctionBegin; 405 /* code only works for square matrices A */ 406 407 /* find size of row to the left of the diagonal part */ 408 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 409 row = row - diag; 410 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 411 if (garray[b->j[b->i[row]+l]] > diag) break; 412 } 413 if (l) { 414 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 415 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 416 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 417 } 418 419 /* diagonal part */ 420 if (a->i[row+1]-a->i[row]) { 421 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 422 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 423 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 424 } 425 426 /* right of diagonal part */ 427 if (b->i[row+1]-b->i[row]-l) { 428 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 429 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 430 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 431 } 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 436 { 437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 438 PetscScalar value = 0.0; 439 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 440 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 441 PetscBool roworiented = aij->roworiented; 442 443 /* Some Variables required in the macro */ 444 Mat A = aij->A; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 446 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 447 PetscBool ignorezeroentries = a->ignorezeroentries; 448 Mat B = aij->B; 449 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 450 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 451 MatScalar *aa,*ba; 452 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 453 PetscInt nonew; 454 MatScalar *ap1,*ap2; 455 456 PetscFunctionBegin; 457 PetscCall(MatSeqAIJGetArray(A,&aa)); 458 PetscCall(MatSeqAIJGetArray(B,&ba)); 459 for (i=0; i<m; i++) { 460 if (im[i] < 0) continue; 461 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 462 if (im[i] >= rstart && im[i] < rend) { 463 row = im[i] - rstart; 464 lastcol1 = -1; 465 rp1 = aj + ai[row]; 466 ap1 = aa + ai[row]; 467 rmax1 = aimax[row]; 468 nrow1 = ailen[row]; 469 low1 = 0; 470 high1 = nrow1; 471 lastcol2 = -1; 472 rp2 = bj + bi[row]; 473 ap2 = ba + bi[row]; 474 rmax2 = bimax[row]; 475 nrow2 = bilen[row]; 476 low2 = 0; 477 high2 = nrow2; 478 479 for (j=0; j<n; j++) { 480 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 481 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 482 if (in[j] >= cstart && in[j] < cend) { 483 col = in[j] - cstart; 484 nonew = a->nonew; 485 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 486 } else if (in[j] < 0) { 487 continue; 488 } else { 489 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 490 if (mat->was_assembled) { 491 if (!aij->colmap) { 492 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 493 } 494 #if defined(PETSC_USE_CTABLE) 495 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 496 col--; 497 #else 498 col = aij->colmap[in[j]] - 1; 499 #endif 500 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 501 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 502 col = in[j]; 503 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 504 B = aij->B; 505 b = (Mat_SeqAIJ*)B->data; 506 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 507 rp2 = bj + bi[row]; 508 ap2 = ba + bi[row]; 509 rmax2 = bimax[row]; 510 nrow2 = bilen[row]; 511 low2 = 0; 512 high2 = nrow2; 513 bm = aij->B->rmap->n; 514 ba = b->a; 515 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 516 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 517 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 518 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 519 } 520 } else col = in[j]; 521 nonew = b->nonew; 522 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 523 } 524 } 525 } else { 526 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 527 if (!aij->donotstash) { 528 mat->assembled = PETSC_FALSE; 529 if (roworiented) { 530 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 531 } else { 532 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 533 } 534 } 535 } 536 } 537 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 538 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 544 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 545 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 546 */ 547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 Mat A = aij->A; /* diagonal part of the matrix */ 551 Mat B = aij->B; /* offdiagonal part of the matrix */ 552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 553 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 554 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 555 PetscInt *ailen = a->ilen,*aj = a->j; 556 PetscInt *bilen = b->ilen,*bj = b->j; 557 PetscInt am = aij->A->rmap->n,j; 558 PetscInt diag_so_far = 0,dnz; 559 PetscInt offd_so_far = 0,onz; 560 561 PetscFunctionBegin; 562 /* Iterate over all rows of the matrix */ 563 for (j=0; j<am; j++) { 564 dnz = onz = 0; 565 /* Iterate over all non-zero columns of the current row */ 566 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 567 /* If column is in the diagonal */ 568 if (mat_j[col] >= cstart && mat_j[col] < cend) { 569 aj[diag_so_far++] = mat_j[col] - cstart; 570 dnz++; 571 } else { /* off-diagonal entries */ 572 bj[offd_so_far++] = mat_j[col]; 573 onz++; 574 } 575 } 576 ailen[j] = dnz; 577 bilen[j] = onz; 578 } 579 PetscFunctionReturn(0); 580 } 581 582 /* 583 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 584 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 585 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 586 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 587 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 588 */ 589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 Mat A = aij->A; /* diagonal part of the matrix */ 593 Mat B = aij->B; /* offdiagonal part of the matrix */ 594 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 596 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 598 PetscInt *ailen = a->ilen,*aj = a->j; 599 PetscInt *bilen = b->ilen,*bj = b->j; 600 PetscInt am = aij->A->rmap->n,j; 601 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 602 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 603 PetscScalar *aa = a->a,*ba = b->a; 604 605 PetscFunctionBegin; 606 /* Iterate over all rows of the matrix */ 607 for (j=0; j<am; j++) { 608 dnz_row = onz_row = 0; 609 rowstart_offd = full_offd_i[j]; 610 rowstart_diag = full_diag_i[j]; 611 /* Iterate over all non-zero columns of the current row */ 612 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 613 /* If column is in the diagonal */ 614 if (mat_j[col] >= cstart && mat_j[col] < cend) { 615 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 616 aa[rowstart_diag+dnz_row] = mat_a[col]; 617 dnz_row++; 618 } else { /* off-diagonal entries */ 619 bj[rowstart_offd+onz_row] = mat_j[col]; 620 ba[rowstart_offd+onz_row] = mat_a[col]; 621 onz_row++; 622 } 623 } 624 ailen[j] = dnz_row; 625 bilen[j] = onz_row; 626 } 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* negative row */ 639 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* negative column */ 644 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 648 } else { 649 if (!aij->colmap) { 650 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 678 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 679 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 680 PetscFunctionReturn(0); 681 } 682 683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 PetscMPIInt n; 687 PetscInt i,j,rstart,ncols,flg; 688 PetscInt *row,*col; 689 PetscBool other_disassembled; 690 PetscScalar *val; 691 692 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 693 694 PetscFunctionBegin; 695 if (!aij->donotstash && !mat->nooffprocentries) { 696 while (1) { 697 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 698 if (!flg) break; 699 700 for (i=0; i<n;) { 701 /* Now identify the consecutive vals belonging to the same row */ 702 for (j=i,rstart=row[j]; j<n; j++) { 703 if (row[j] != rstart) break; 704 } 705 if (j < n) ncols = j-i; 706 else ncols = n-i; 707 /* Now assemble all these values with a single function call */ 708 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 709 i = j; 710 } 711 } 712 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 713 } 714 #if defined(PETSC_HAVE_DEVICE) 715 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 716 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 717 if (mat->boundtocpu) { 718 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 719 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 720 } 721 #endif 722 PetscCall(MatAssemblyBegin(aij->A,mode)); 723 PetscCall(MatAssemblyEnd(aij->A,mode)); 724 725 /* determine if any processor has disassembled, if so we must 726 also disassemble ourself, in order that we may reassemble. */ 727 /* 728 if nonzero structure of submatrix B cannot change then we know that 729 no processor disassembled thus we can skip this stuff 730 */ 731 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 732 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 733 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 734 PetscCall(MatDisAssemble_MPIAIJ(mat)); 735 } 736 } 737 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 738 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 739 } 740 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 741 #if defined(PETSC_HAVE_DEVICE) 742 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 743 #endif 744 PetscCall(MatAssemblyBegin(aij->B,mode)); 745 PetscCall(MatAssemblyEnd(aij->B,mode)); 746 747 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 748 749 aij->rowvalues = NULL; 750 751 PetscCall(VecDestroy(&aij->diag)); 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 757 } 758 #if defined(PETSC_HAVE_DEVICE) 759 mat->offloadmask = PETSC_OFFLOAD_BOTH; 760 #endif 761 PetscFunctionReturn(0); 762 } 763 764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 765 { 766 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 767 768 PetscFunctionBegin; 769 PetscCall(MatZeroEntries(l->A)); 770 PetscCall(MatZeroEntries(l->B)); 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 775 { 776 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 777 PetscObjectState sA, sB; 778 PetscInt *lrows; 779 PetscInt r, len; 780 PetscBool cong, lch, gch; 781 782 PetscFunctionBegin; 783 /* get locally owned rows */ 784 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 785 PetscCall(MatHasCongruentLayouts(A,&cong)); 786 /* fix right hand side if needed */ 787 if (x && b) { 788 const PetscScalar *xx; 789 PetscScalar *bb; 790 791 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 792 PetscCall(VecGetArrayRead(x, &xx)); 793 PetscCall(VecGetArray(b, &bb)); 794 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 795 PetscCall(VecRestoreArrayRead(x, &xx)); 796 PetscCall(VecRestoreArray(b, &bb)); 797 } 798 799 sA = mat->A->nonzerostate; 800 sB = mat->B->nonzerostate; 801 802 if (diag != 0.0 && cong) { 803 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 804 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 805 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 806 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 807 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 808 PetscInt nnwA, nnwB; 809 PetscBool nnzA, nnzB; 810 811 nnwA = aijA->nonew; 812 nnwB = aijB->nonew; 813 nnzA = aijA->keepnonzeropattern; 814 nnzB = aijB->keepnonzeropattern; 815 if (!nnzA) { 816 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 817 aijA->nonew = 0; 818 } 819 if (!nnzB) { 820 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 821 aijB->nonew = 0; 822 } 823 /* Must zero here before the next loop */ 824 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 825 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 826 for (r = 0; r < len; ++r) { 827 const PetscInt row = lrows[r] + A->rmap->rstart; 828 if (row >= A->cmap->N) continue; 829 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 830 } 831 aijA->nonew = nnwA; 832 aijB->nonew = nnwB; 833 } else { 834 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 835 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 836 } 837 PetscCall(PetscFree(lrows)); 838 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 839 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 840 841 /* reduce nonzerostate */ 842 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 843 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 844 if (gch) A->nonzerostate++; 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 849 { 850 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 PetscCall(PetscMalloc1(n, &lrows)); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 PetscCall(PetscMalloc1(N, &rrows)); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 879 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 880 /* Collect flags for rows to be zeroed */ 881 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 883 PetscCall(PetscSFDestroy(&sf)); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 888 /* handle off diagonal part of matrix */ 889 PetscCall(MatCreateVecs(A,&xmask,NULL)); 890 PetscCall(VecDuplicate(l->lvec,&lmask)); 891 PetscCall(VecGetArray(xmask,&bb)); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 PetscCall(VecRestoreArray(xmask,&bb)); 894 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 896 PetscCall(VecDestroy(&xmask)); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 PetscCall(MatHasCongruentLayouts(A,&cong)); 901 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 904 PetscCall(VecGetArrayRead(l->lvec,&xx)); 905 PetscCall(VecGetArray(b,&bb)); 906 } 907 PetscCall(VecGetArray(lmask,&mask)); 908 /* remove zeroed rows of off diagonal matrix */ 909 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 PetscCall(VecRestoreArray(b,&bb)); 952 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 953 } 954 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 955 PetscCall(VecRestoreArray(lmask,&mask)); 956 PetscCall(VecDestroy(&lmask)); 957 PetscCall(PetscFree(lrows)); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscInt nt; 971 VecScatter Mvctx = a->Mvctx; 972 973 PetscFunctionBegin; 974 PetscCall(VecGetLocalSize(xx,&nt)); 975 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 976 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 977 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 978 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 979 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 987 PetscFunctionBegin; 988 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 VecScatter Mvctx = a->Mvctx; 996 997 PetscFunctionBegin; 998 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 999 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1000 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1001 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1006 { 1007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1008 1009 PetscFunctionBegin; 1010 /* do nondiagonal part */ 1011 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1012 /* do local part */ 1013 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1014 /* add partial results together */ 1015 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1021 { 1022 MPI_Comm comm; 1023 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1024 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1025 IS Me,Notme; 1026 PetscInt M,N,first,last,*notme,i; 1027 PetscBool lf; 1028 PetscMPIInt size; 1029 1030 PetscFunctionBegin; 1031 /* Easy test: symmetric diagonal block */ 1032 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1033 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1034 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1035 if (!*f) PetscFunctionReturn(0); 1036 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1037 PetscCallMPI(MPI_Comm_size(comm,&size)); 1038 if (size == 1) PetscFunctionReturn(0); 1039 1040 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1041 PetscCall(MatGetSize(Amat,&M,&N)); 1042 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1043 PetscCall(PetscMalloc1(N-last+first,¬me)); 1044 for (i=0; i<first; i++) notme[i] = i; 1045 for (i=last; i<M; i++) notme[i-last+first] = i; 1046 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1047 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1048 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1049 Aoff = Aoffs[0]; 1050 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1051 Boff = Boffs[0]; 1052 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1053 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1054 PetscCall(MatDestroyMatrices(1,&Boffs)); 1055 PetscCall(ISDestroy(&Me)); 1056 PetscCall(ISDestroy(&Notme)); 1057 PetscCall(PetscFree(notme)); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1062 { 1063 PetscFunctionBegin; 1064 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* 1084 This only works correctly for square matrices where the subblock A->A is the 1085 diagonal block 1086 */ 1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 1091 PetscFunctionBegin; 1092 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1093 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1094 PetscCall(MatGetDiagonal(a->A,v)); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 1102 PetscFunctionBegin; 1103 PetscCall(MatScale(a->A,aa)); 1104 PetscCall(MatScale(a->B,aa)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1115 PetscCall(PetscFree(aij->Aperm1)); 1116 PetscCall(PetscFree(aij->Bperm1)); 1117 PetscCall(PetscFree(aij->Ajmap1)); 1118 PetscCall(PetscFree(aij->Bjmap1)); 1119 1120 PetscCall(PetscFree(aij->Aimap2)); 1121 PetscCall(PetscFree(aij->Bimap2)); 1122 PetscCall(PetscFree(aij->Aperm2)); 1123 PetscCall(PetscFree(aij->Bperm2)); 1124 PetscCall(PetscFree(aij->Ajmap2)); 1125 PetscCall(PetscFree(aij->Bjmap2)); 1126 1127 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1128 PetscCall(PetscFree(aij->Cperm1)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1133 { 1134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1135 1136 PetscFunctionBegin; 1137 #if defined(PETSC_USE_LOG) 1138 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1139 #endif 1140 PetscCall(MatStashDestroy_Private(&mat->stash)); 1141 PetscCall(VecDestroy(&aij->diag)); 1142 PetscCall(MatDestroy(&aij->A)); 1143 PetscCall(MatDestroy(&aij->B)); 1144 #if defined(PETSC_USE_CTABLE) 1145 PetscCall(PetscTableDestroy(&aij->colmap)); 1146 #else 1147 PetscCall(PetscFree(aij->colmap)); 1148 #endif 1149 PetscCall(PetscFree(aij->garray)); 1150 PetscCall(VecDestroy(&aij->lvec)); 1151 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1152 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1153 PetscCall(PetscFree(aij->ld)); 1154 1155 /* Free COO */ 1156 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1157 1158 PetscCall(PetscFree(mat->data)); 1159 1160 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1161 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1162 1163 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1173 #if defined(PETSC_HAVE_CUDA) 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1175 #endif 1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1178 #endif 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1180 #if defined(PETSC_HAVE_ELEMENTAL) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1182 #endif 1183 #if defined(PETSC_HAVE_SCALAPACK) 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1185 #endif 1186 #if defined(PETSC_HAVE_HYPRE) 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1196 #if defined(PETSC_HAVE_MKL_SPARSE) 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1198 #endif 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1203 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1211 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1212 const PetscInt *garray = aij->garray; 1213 const PetscScalar *aa,*ba; 1214 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 1219 PetscFunctionBegin; 1220 PetscCall(PetscViewerSetUp(viewer)); 1221 1222 M = mat->rmap->N; 1223 N = mat->cmap->N; 1224 m = mat->rmap->n; 1225 rs = mat->rmap->rstart; 1226 cs = mat->cmap->rstart; 1227 nz = A->nz + B->nz; 1228 1229 /* write matrix header */ 1230 header[0] = MAT_FILE_CLASSID; 1231 header[1] = M; header[2] = N; header[3] = nz; 1232 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1233 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1234 1235 /* fill in and store row lengths */ 1236 PetscCall(PetscMalloc1(m,&rowlens)); 1237 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1238 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1239 PetscCall(PetscFree(rowlens)); 1240 1241 /* fill in and store column indices */ 1242 PetscCall(PetscMalloc1(nz,&colidxs)); 1243 for (cnt=0, i=0; i<m; i++) { 1244 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1245 if (garray[B->j[jb]] > cs) break; 1246 colidxs[cnt++] = garray[B->j[jb]]; 1247 } 1248 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1249 colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb<B->i[i+1]; jb++) 1251 colidxs[cnt++] = garray[B->j[jb]]; 1252 } 1253 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1254 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1255 PetscCall(PetscFree(colidxs)); 1256 1257 /* fill in and store nonzero values */ 1258 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1259 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1260 PetscCall(PetscMalloc1(nz,&matvals)); 1261 for (cnt=0, i=0; i<m; i++) { 1262 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1263 if (garray[B->j[jb]] > cs) break; 1264 matvals[cnt++] = ba[jb]; 1265 } 1266 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1267 matvals[cnt++] = aa[ja]; 1268 for (; jb<B->i[i+1]; jb++) 1269 matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1273 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscMPIInt rank = aij->rank,size = aij->size; 1287 PetscBool isdraw,iascii,isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer,&format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size,&nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1301 for (i=0; i<(PetscInt)size; i++) { 1302 nmax = PetscMax(nmax,nz[i]); 1303 nmin = PetscMin(nmin,nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg/size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1309 PetscFunctionReturn(0); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer,&format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes=NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1317 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A,viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A,viewer)); 1359 PetscFunctionReturn(0); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1364 PetscCall(PetscDrawIsNull(draw,&isnull)); 1365 if (isnull) PetscFunctionReturn(0); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow,iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1374 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) { 1400 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1401 } 1402 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1403 } 1404 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1405 PetscCall(PetscViewerFlush(viewer)); 1406 PetscCall(MatDestroy(&A)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1412 { 1413 PetscBool iascii,isdraw,issocket,isbinary; 1414 1415 PetscFunctionBegin; 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1419 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1420 if (iascii || isdraw || isbinary || issocket) { 1421 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1427 { 1428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1429 Vec bb1 = NULL; 1430 PetscBool hasop; 1431 1432 PetscFunctionBegin; 1433 if (flag == SOR_APPLY_UPPER) { 1434 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1439 PetscCall(VecDuplicate(bb,&bb1)); 1440 } 1441 1442 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1445 its--; 1446 } 1447 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec,-1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1458 } 1459 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1462 its--; 1463 } 1464 while (its--) { 1465 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 PetscCall(VecScale(mat->lvec,-1.0)); 1470 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1471 1472 /* local sweep */ 1473 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1474 } 1475 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1478 its--; 1479 } 1480 while (its--) { 1481 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 PetscCall(VecScale(mat->lvec,-1.0)); 1486 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1490 } 1491 } else if (flag & SOR_EISENSTAT) { 1492 Vec xx1; 1493 1494 PetscCall(VecDuplicate(bb,&xx1)); 1495 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1496 1497 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1499 if (!mat->diag) { 1500 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1501 PetscCall(MatGetDiagonal(matin,mat->diag)); 1502 } 1503 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1504 if (hasop) { 1505 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1506 } else { 1507 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1508 } 1509 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1510 1511 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1512 1513 /* local sweep */ 1514 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1515 PetscCall(VecAXPY(xx,1.0,xx1)); 1516 PetscCall(VecDestroy(&xx1)); 1517 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1518 1519 PetscCall(VecDestroy(&bb1)); 1520 1521 matin->factorerrortype = mat->A->factorerrortype; 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1526 { 1527 Mat aA,aB,Aperm; 1528 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1529 PetscScalar *aa,*ba; 1530 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1531 PetscSF rowsf,sf; 1532 IS parcolp = NULL; 1533 PetscBool done; 1534 1535 PetscFunctionBegin; 1536 PetscCall(MatGetLocalSize(A,&m,&n)); 1537 PetscCall(ISGetIndices(rowp,&rwant)); 1538 PetscCall(ISGetIndices(colp,&cwant)); 1539 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1540 1541 /* Invert row permutation to find out where my rows should go */ 1542 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1543 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1544 PetscCall(PetscSFSetFromOptions(rowsf)); 1545 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1546 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1548 1549 /* Invert column permutation to find out where my columns should go */ 1550 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1551 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1552 PetscCall(PetscSFSetFromOptions(sf)); 1553 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1554 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(ISRestoreIndices(rowp,&rwant)); 1559 PetscCall(ISRestoreIndices(colp,&cwant)); 1560 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1561 1562 /* Find out where my gcols should go */ 1563 PetscCall(MatGetSize(aB,NULL,&ng)); 1564 PetscCall(PetscMalloc1(ng,&gcdest)); 1565 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1566 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1567 PetscCall(PetscSFSetFromOptions(sf)); 1568 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&sf)); 1571 1572 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1573 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1574 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1575 for (i=0; i<m; i++) { 1576 PetscInt row = rdest[i]; 1577 PetscMPIInt rowner; 1578 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1579 for (j=ai[i]; j<ai[i+1]; j++) { 1580 PetscInt col = cdest[aj[j]]; 1581 PetscMPIInt cowner; 1582 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 for (j=bi[i]; j<bi[i+1]; j++) { 1587 PetscInt col = gcdest[bj[j]]; 1588 PetscMPIInt cowner; 1589 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1590 if (rowner == cowner) dnnz[i]++; 1591 else onnz[i]++; 1592 } 1593 } 1594 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1598 PetscCall(PetscSFDestroy(&rowsf)); 1599 1600 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1601 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1602 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1603 for (i=0; i<m; i++) { 1604 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1605 PetscInt j0,rowlen; 1606 rowlen = ai[i+1] - ai[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1608 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1609 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1610 } 1611 rowlen = bi[i+1] - bi[i]; 1612 for (j0=j=0; j<rowlen; j0=j) { 1613 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1614 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1615 } 1616 } 1617 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1619 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1620 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1621 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1622 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1623 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1624 PetscCall(PetscFree3(work,rdest,cdest)); 1625 PetscCall(PetscFree(gcdest)); 1626 if (parcolp) PetscCall(ISDestroy(&colp)); 1627 *B = Aperm; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1632 { 1633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1634 1635 PetscFunctionBegin; 1636 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1637 if (ghosts) *ghosts = aij->garray; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1642 { 1643 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1644 Mat A = mat->A,B = mat->B; 1645 PetscLogDouble isend[5],irecv[5]; 1646 1647 PetscFunctionBegin; 1648 info->block_size = 1.0; 1649 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1650 1651 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1652 isend[3] = info->memory; isend[4] = info->mallocs; 1653 1654 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1655 1656 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1657 isend[3] += info->memory; isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A,1); 1702 PetscCall(MatSetOption(a->A,op,flg)); 1703 PetscCall(MatSetOption(a->B,op,flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A,1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A,op,flg)); 1710 PetscCall(MatSetOption(a->B,op,flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1726 case MAT_SPD_ETERNAL: 1727 /* if the diagonal matrix is square it inherits some of the properties above */ 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1745 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1746 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1747 PetscInt *cmap,*idx_p; 1748 1749 PetscFunctionBegin; 1750 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1751 mat->getrowactive = PETSC_TRUE; 1752 1753 if (!mat->rowvalues && (idx || v)) { 1754 /* 1755 allocate enough space to hold information from the longest row. 1756 */ 1757 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1758 PetscInt max = 1,tmp; 1759 for (i=0; i<matin->rmap->n; i++) { 1760 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1761 if (max < tmp) max = tmp; 1762 } 1763 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1764 } 1765 1766 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1767 lrow = row - rstart; 1768 1769 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1770 if (!v) {pvA = NULL; pvB = NULL;} 1771 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1772 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1773 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1774 nztot = nzA + nzB; 1775 1776 cmap = mat->garray; 1777 if (v || idx) { 1778 if (nztot) { 1779 /* Sort by increasing column numbers, assuming A and B already sorted */ 1780 PetscInt imark = -1; 1781 if (v) { 1782 *v = v_p = mat->rowvalues; 1783 for (i=0; i<nzB; i++) { 1784 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1785 else break; 1786 } 1787 imark = i; 1788 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1789 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1790 } 1791 if (idx) { 1792 *idx = idx_p = mat->rowindices; 1793 if (imark > -1) { 1794 for (i=0; i<imark; i++) { 1795 idx_p[i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 for (i=0; i<nzB; i++) { 1799 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1800 else break; 1801 } 1802 imark = i; 1803 } 1804 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1805 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1806 } 1807 } else { 1808 if (idx) *idx = NULL; 1809 if (v) *v = NULL; 1810 } 1811 } 1812 *nz = nztot; 1813 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1814 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 1822 PetscFunctionBegin; 1823 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1824 aij->getrowactive = PETSC_FALSE; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1829 { 1830 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1831 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1832 PetscInt i,j,cstart = mat->cmap->rstart; 1833 PetscReal sum = 0.0; 1834 const MatScalar *v,*amata,*bmata; 1835 1836 PetscFunctionBegin; 1837 if (aij->size == 1) { 1838 PetscCall(MatNorm(aij->A,type,norm)); 1839 } else { 1840 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1841 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1842 if (type == NORM_FROBENIUS) { 1843 v = amata; 1844 for (i=0; i<amat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 v = bmata; 1848 for (i=0; i<bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1850 } 1851 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1852 *norm = PetscSqrtReal(*norm); 1853 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1854 } else if (type == NORM_1) { /* max column norm */ 1855 PetscReal *tmp,*tmp2; 1856 PetscInt *jj,*garray = aij->garray; 1857 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1858 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1859 *norm = 0.0; 1860 v = amata; jj = amat->j; 1861 for (j=0; j<amat->nz; j++) { 1862 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1863 } 1864 v = bmata; jj = bmat->j; 1865 for (j=0; j<bmat->nz; j++) { 1866 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1867 } 1868 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1869 for (j=0; j<mat->cmap->N; j++) { 1870 if (tmp2[j] > *norm) *norm = tmp2[j]; 1871 } 1872 PetscCall(PetscFree(tmp)); 1873 PetscCall(PetscFree(tmp2)); 1874 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1875 } else if (type == NORM_INFINITY) { /* max row norm */ 1876 PetscReal ntemp = 0.0; 1877 for (j=0; j<aij->A->rmap->n; j++) { 1878 v = amata + amat->i[j]; 1879 sum = 0.0; 1880 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 v = bmata + bmat->i[j]; 1884 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1885 sum += PetscAbsScalar(*v); v++; 1886 } 1887 if (sum > ntemp) ntemp = sum; 1888 } 1889 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1890 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1891 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1892 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1893 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1899 { 1900 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1901 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1902 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1903 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1904 Mat B,A_diag,*B_diag; 1905 const MatScalar *pbv,*bv; 1906 1907 PetscFunctionBegin; 1908 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1909 ai = Aloc->i; aj = Aloc->j; 1910 bi = Bloc->i; bj = Bloc->j; 1911 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1912 PetscInt *d_nnz,*g_nnz,*o_nnz; 1913 PetscSFNode *oloc; 1914 PETSC_UNUSED PetscSF sf; 1915 1916 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1917 /* compute d_nnz for preallocation */ 1918 PetscCall(PetscArrayzero(d_nnz,na)); 1919 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1920 /* compute local off-diagonal contributions */ 1921 PetscCall(PetscArrayzero(g_nnz,nb)); 1922 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1923 /* map those to global */ 1924 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1925 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1926 PetscCall(PetscSFSetFromOptions(sf)); 1927 PetscCall(PetscArrayzero(o_nnz,na)); 1928 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1929 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1930 PetscCall(PetscSFDestroy(&sf)); 1931 1932 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1933 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1934 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1935 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1936 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1937 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1938 } else { 1939 B = *matout; 1940 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1941 } 1942 1943 b = (Mat_MPIAIJ*)B->data; 1944 A_diag = a->A; 1945 B_diag = &b->A; 1946 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1947 A_diag_ncol = A_diag->cmap->N; 1948 B_diag_ilen = sub_B_diag->ilen; 1949 B_diag_i = sub_B_diag->i; 1950 1951 /* Set ilen for diagonal of B */ 1952 for (i=0; i<A_diag_ncol; i++) { 1953 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1954 } 1955 1956 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1957 very quickly (=without using MatSetValues), because all writes are local. */ 1958 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1959 1960 /* copy over the B part */ 1961 PetscCall(PetscMalloc1(bi[mb],&cols)); 1962 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1963 pbv = bv; 1964 row = A->rmap->rstart; 1965 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1966 cols_tmp = cols; 1967 for (i=0; i<mb; i++) { 1968 ncol = bi[i+1]-bi[i]; 1969 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1970 row++; 1971 pbv += ncol; cols_tmp += ncol; 1972 } 1973 PetscCall(PetscFree(cols)); 1974 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1975 1976 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1977 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1978 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1979 *matout = B; 1980 } else { 1981 PetscCall(MatHeaderMerge(A,&B)); 1982 } 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1987 { 1988 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1989 Mat a = aij->A,b = aij->B; 1990 PetscInt s1,s2,s3; 1991 1992 PetscFunctionBegin; 1993 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1994 if (rr) { 1995 PetscCall(VecGetLocalSize(rr,&s1)); 1996 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1997 /* Overlap communication with computation. */ 1998 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1999 } 2000 if (ll) { 2001 PetscCall(VecGetLocalSize(ll,&s1)); 2002 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2003 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2004 } 2005 /* scale the diagonal block */ 2006 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2007 2008 if (rr) { 2009 /* Do a scatter end and then right scale the off-diagonal block */ 2010 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2011 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2012 } 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2017 { 2018 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2019 2020 PetscFunctionBegin; 2021 PetscCall(MatSetUnfactored(a->A)); 2022 PetscFunctionReturn(0); 2023 } 2024 2025 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2026 { 2027 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2028 Mat a,b,c,d; 2029 PetscBool flg; 2030 2031 PetscFunctionBegin; 2032 a = matA->A; b = matA->B; 2033 c = matB->A; d = matB->B; 2034 2035 PetscCall(MatEqual(a,c,&flg)); 2036 if (flg) { 2037 PetscCall(MatEqual(b,d,&flg)); 2038 } 2039 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2040 PetscFunctionReturn(0); 2041 } 2042 2043 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2044 { 2045 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2046 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2047 2048 PetscFunctionBegin; 2049 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2050 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2051 /* because of the column compression in the off-processor part of the matrix a->B, 2052 the number of columns in a->B and b->B may be different, hence we cannot call 2053 the MatCopy() directly on the two parts. If need be, we can provide a more 2054 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2055 then copying the submatrices */ 2056 PetscCall(MatCopy_Basic(A,B,str)); 2057 } else { 2058 PetscCall(MatCopy(a->A,b->A,str)); 2059 PetscCall(MatCopy(a->B,b->B,str)); 2060 } 2061 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2062 PetscFunctionReturn(0); 2063 } 2064 2065 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2066 { 2067 PetscFunctionBegin; 2068 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 /* 2073 Computes the number of nonzeros per row needed for preallocation when X and Y 2074 have different nonzero structure. 2075 */ 2076 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2077 { 2078 PetscInt i,j,k,nzx,nzy; 2079 2080 PetscFunctionBegin; 2081 /* Set the number of nonzeros in the new matrix */ 2082 for (i=0; i<m; i++) { 2083 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2084 nzx = xi[i+1] - xi[i]; 2085 nzy = yi[i+1] - yi[i]; 2086 nnz[i] = 0; 2087 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2088 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2089 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2090 nnz[i]++; 2091 } 2092 for (; k<nzy; k++) nnz[i]++; 2093 } 2094 PetscFunctionReturn(0); 2095 } 2096 2097 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2098 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2099 { 2100 PetscInt m = Y->rmap->N; 2101 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2102 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2103 2104 PetscFunctionBegin; 2105 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2110 { 2111 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2112 2113 PetscFunctionBegin; 2114 if (str == SAME_NONZERO_PATTERN) { 2115 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2116 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2117 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2118 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2119 } else { 2120 Mat B; 2121 PetscInt *nnz_d,*nnz_o; 2122 2123 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2124 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2125 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2126 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2127 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2128 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2129 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2130 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2131 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2132 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2133 PetscCall(MatHeaderMerge(Y,&B)); 2134 PetscCall(PetscFree(nnz_d)); 2135 PetscCall(PetscFree(nnz_o)); 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2141 2142 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2143 { 2144 PetscFunctionBegin; 2145 if (PetscDefined(USE_COMPLEX)) { 2146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2147 2148 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2149 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2150 } 2151 PetscFunctionReturn(0); 2152 } 2153 2154 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2155 { 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2157 2158 PetscFunctionBegin; 2159 PetscCall(MatRealPart(a->A)); 2160 PetscCall(MatRealPart(a->B)); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2167 2168 PetscFunctionBegin; 2169 PetscCall(MatImaginaryPart(a->A)); 2170 PetscCall(MatImaginaryPart(a->B)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2175 { 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2177 PetscInt i,*idxb = NULL,m = A->rmap->n; 2178 PetscScalar *va,*vv; 2179 Vec vB,vA; 2180 const PetscScalar *vb; 2181 2182 PetscFunctionBegin; 2183 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2184 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2185 2186 PetscCall(VecGetArrayWrite(vA,&va)); 2187 if (idx) { 2188 for (i=0; i<m; i++) { 2189 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2190 } 2191 } 2192 2193 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2194 PetscCall(PetscMalloc1(m,&idxb)); 2195 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2196 2197 PetscCall(VecGetArrayWrite(v,&vv)); 2198 PetscCall(VecGetArrayRead(vB,&vb)); 2199 for (i=0; i<m; i++) { 2200 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2201 vv[i] = vb[i]; 2202 if (idx) idx[i] = a->garray[idxb[i]]; 2203 } else { 2204 vv[i] = va[i]; 2205 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2206 idx[i] = a->garray[idxb[i]]; 2207 } 2208 } 2209 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2210 PetscCall(VecRestoreArrayWrite(vA,&va)); 2211 PetscCall(VecRestoreArrayRead(vB,&vb)); 2212 PetscCall(PetscFree(idxb)); 2213 PetscCall(VecDestroy(&vA)); 2214 PetscCall(VecDestroy(&vB)); 2215 PetscFunctionReturn(0); 2216 } 2217 2218 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2221 PetscInt m = A->rmap->n,n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba,*bav; 2228 PetscInt r,j,col,ncols,*bi,*bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v,&diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2240 PetscFunctionReturn(0); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v,&a)); 2244 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2245 PetscCall(VecRestoreArrayWrite(v,&a)); 2246 } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2251 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2252 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2253 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2254 2255 /* Get offdiagIdx[] for implicit 0.0 */ 2256 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2257 ba = bav; 2258 bi = b->i; 2259 bj = b->j; 2260 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2261 for (r = 0; r < m; r++) { 2262 ncols = bi[r+1] - bi[r]; 2263 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2264 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2265 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2266 offdiagA[r] = 0.0; 2267 2268 /* Find first hole in the cmap */ 2269 for (j=0; j<ncols; j++) { 2270 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2271 if (col > j && j < cstart) { 2272 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2273 break; 2274 } else if (col > j + n && j >= cstart) { 2275 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2276 break; 2277 } 2278 } 2279 if (j == ncols && ncols < A->cmap->N - n) { 2280 /* a hole is outside compressed Bcols */ 2281 if (ncols == 0) { 2282 if (cstart) { 2283 offdiagIdx[r] = 0; 2284 } else offdiagIdx[r] = cend; 2285 } else { /* ncols > 0 */ 2286 offdiagIdx[r] = cmap[ncols-1] + 1; 2287 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2288 } 2289 } 2290 } 2291 2292 for (j=0; j<ncols; j++) { 2293 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2294 ba++; bj++; 2295 } 2296 } 2297 2298 PetscCall(VecGetArrayWrite(v, &a)); 2299 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2300 for (r = 0; r < m; ++r) { 2301 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) idx[r] = cstart + diagIdx[r]; 2304 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2305 a[r] = diagA[r]; 2306 if (idx) { 2307 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2308 idx[r] = cstart + diagIdx[r]; 2309 } else idx[r] = offdiagIdx[r]; 2310 } 2311 } else { 2312 a[r] = offdiagA[r]; 2313 if (idx) idx[r] = offdiagIdx[r]; 2314 } 2315 } 2316 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2317 PetscCall(VecRestoreArrayWrite(v, &a)); 2318 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2319 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2320 PetscCall(VecDestroy(&diagV)); 2321 PetscCall(VecDestroy(&offdiagV)); 2322 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2327 { 2328 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2329 PetscInt m = A->rmap->n,n = A->cmap->n; 2330 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2331 PetscInt *cmap = mat->garray; 2332 PetscInt *diagIdx, *offdiagIdx; 2333 Vec diagV, offdiagV; 2334 PetscScalar *a, *diagA, *offdiagA; 2335 const PetscScalar *ba,*bav; 2336 PetscInt r,j,col,ncols,*bi,*bj; 2337 Mat B = mat->B; 2338 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2339 2340 PetscFunctionBegin; 2341 /* When a process holds entire A and other processes have no entry */ 2342 if (A->cmap->N == n) { 2343 PetscCall(VecGetArrayWrite(v,&diagA)); 2344 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2345 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2346 PetscCall(VecDestroy(&diagV)); 2347 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2348 PetscFunctionReturn(0); 2349 } else if (n == 0) { 2350 if (m) { 2351 PetscCall(VecGetArrayWrite(v,&a)); 2352 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2353 PetscCall(VecRestoreArrayWrite(v,&a)); 2354 } 2355 PetscFunctionReturn(0); 2356 } 2357 2358 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2359 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2361 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2362 2363 /* Get offdiagIdx[] for implicit 0.0 */ 2364 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2365 ba = bav; 2366 bi = b->i; 2367 bj = b->j; 2368 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2369 for (r = 0; r < m; r++) { 2370 ncols = bi[r+1] - bi[r]; 2371 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2372 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2373 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2374 offdiagA[r] = 0.0; 2375 2376 /* Find first hole in the cmap */ 2377 for (j=0; j<ncols; j++) { 2378 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2379 if (col > j && j < cstart) { 2380 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2381 break; 2382 } else if (col > j + n && j >= cstart) { 2383 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2384 break; 2385 } 2386 } 2387 if (j == ncols && ncols < A->cmap->N - n) { 2388 /* a hole is outside compressed Bcols */ 2389 if (ncols == 0) { 2390 if (cstart) { 2391 offdiagIdx[r] = 0; 2392 } else offdiagIdx[r] = cend; 2393 } else { /* ncols > 0 */ 2394 offdiagIdx[r] = cmap[ncols-1] + 1; 2395 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2396 } 2397 } 2398 } 2399 2400 for (j=0; j<ncols; j++) { 2401 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2402 ba++; bj++; 2403 } 2404 } 2405 2406 PetscCall(VecGetArrayWrite(v, &a)); 2407 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2408 for (r = 0; r < m; ++r) { 2409 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2410 a[r] = diagA[r]; 2411 if (idx) idx[r] = cstart + diagIdx[r]; 2412 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2413 a[r] = diagA[r]; 2414 if (idx) { 2415 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2416 idx[r] = cstart + diagIdx[r]; 2417 } else idx[r] = offdiagIdx[r]; 2418 } 2419 } else { 2420 a[r] = offdiagA[r]; 2421 if (idx) idx[r] = offdiagIdx[r]; 2422 } 2423 } 2424 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2425 PetscCall(VecRestoreArrayWrite(v, &a)); 2426 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2427 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2428 PetscCall(VecDestroy(&diagV)); 2429 PetscCall(VecDestroy(&offdiagV)); 2430 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2435 { 2436 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2437 PetscInt m = A->rmap->n,n = A->cmap->n; 2438 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2439 PetscInt *cmap = mat->garray; 2440 PetscInt *diagIdx, *offdiagIdx; 2441 Vec diagV, offdiagV; 2442 PetscScalar *a, *diagA, *offdiagA; 2443 const PetscScalar *ba,*bav; 2444 PetscInt r,j,col,ncols,*bi,*bj; 2445 Mat B = mat->B; 2446 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2447 2448 PetscFunctionBegin; 2449 /* When a process holds entire A and other processes have no entry */ 2450 if (A->cmap->N == n) { 2451 PetscCall(VecGetArrayWrite(v,&diagA)); 2452 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2453 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2454 PetscCall(VecDestroy(&diagV)); 2455 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2456 PetscFunctionReturn(0); 2457 } else if (n == 0) { 2458 if (m) { 2459 PetscCall(VecGetArrayWrite(v,&a)); 2460 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2461 PetscCall(VecRestoreArrayWrite(v,&a)); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2467 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2468 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2470 2471 /* Get offdiagIdx[] for implicit 0.0 */ 2472 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2473 ba = bav; 2474 bi = b->i; 2475 bj = b->j; 2476 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2477 for (r = 0; r < m; r++) { 2478 ncols = bi[r+1] - bi[r]; 2479 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2480 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2481 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2482 offdiagA[r] = 0.0; 2483 2484 /* Find first hole in the cmap */ 2485 for (j=0; j<ncols; j++) { 2486 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2487 if (col > j && j < cstart) { 2488 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2489 break; 2490 } else if (col > j + n && j >= cstart) { 2491 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2492 break; 2493 } 2494 } 2495 if (j == ncols && ncols < A->cmap->N - n) { 2496 /* a hole is outside compressed Bcols */ 2497 if (ncols == 0) { 2498 if (cstart) { 2499 offdiagIdx[r] = 0; 2500 } else offdiagIdx[r] = cend; 2501 } else { /* ncols > 0 */ 2502 offdiagIdx[r] = cmap[ncols-1] + 1; 2503 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2504 } 2505 } 2506 } 2507 2508 for (j=0; j<ncols; j++) { 2509 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2510 ba++; bj++; 2511 } 2512 } 2513 2514 PetscCall(VecGetArrayWrite(v, &a)); 2515 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2516 for (r = 0; r < m; ++r) { 2517 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2518 a[r] = diagA[r]; 2519 if (idx) idx[r] = cstart + diagIdx[r]; 2520 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2521 a[r] = diagA[r]; 2522 if (idx) { 2523 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2524 idx[r] = cstart + diagIdx[r]; 2525 } else idx[r] = offdiagIdx[r]; 2526 } 2527 } else { 2528 a[r] = offdiagA[r]; 2529 if (idx) idx[r] = offdiagIdx[r]; 2530 } 2531 } 2532 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2533 PetscCall(VecRestoreArrayWrite(v, &a)); 2534 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2535 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2536 PetscCall(VecDestroy(&diagV)); 2537 PetscCall(VecDestroy(&offdiagV)); 2538 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2543 { 2544 Mat *dummy; 2545 2546 PetscFunctionBegin; 2547 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2548 *newmat = *dummy; 2549 PetscCall(PetscFree(dummy)); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2554 { 2555 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2556 2557 PetscFunctionBegin; 2558 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2559 A->factorerrortype = a->A->factorerrortype; 2560 PetscFunctionReturn(0); 2561 } 2562 2563 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2564 { 2565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2566 2567 PetscFunctionBegin; 2568 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2569 PetscCall(MatSetRandom(aij->A,rctx)); 2570 if (x->assembled) { 2571 PetscCall(MatSetRandom(aij->B,rctx)); 2572 } else { 2573 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2574 } 2575 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2576 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2581 { 2582 PetscFunctionBegin; 2583 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2584 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2585 PetscFunctionReturn(0); 2586 } 2587 2588 /*@ 2589 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2590 2591 Not collective 2592 2593 Input Parameter: 2594 . A - the matrix 2595 2596 Output Parameter: 2597 . nz - the number of nonzeros 2598 2599 Level: advanced 2600 2601 @*/ 2602 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2603 { 2604 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2605 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2606 2607 PetscFunctionBegin; 2608 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 /*@ 2613 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2614 2615 Collective on Mat 2616 2617 Input Parameters: 2618 + A - the matrix 2619 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2620 2621 Level: advanced 2622 2623 @*/ 2624 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2625 { 2626 PetscFunctionBegin; 2627 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2628 PetscFunctionReturn(0); 2629 } 2630 2631 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2632 { 2633 PetscBool sc = PETSC_FALSE,flg; 2634 2635 PetscFunctionBegin; 2636 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2637 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2638 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2639 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2640 PetscOptionsHeadEnd(); 2641 PetscFunctionReturn(0); 2642 } 2643 2644 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2645 { 2646 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2647 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2648 2649 PetscFunctionBegin; 2650 if (!Y->preallocated) { 2651 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2652 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2653 PetscInt nonew = aij->nonew; 2654 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2655 aij->nonew = nonew; 2656 } 2657 PetscCall(MatShift_Basic(Y,a)); 2658 PetscFunctionReturn(0); 2659 } 2660 2661 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2662 { 2663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2664 2665 PetscFunctionBegin; 2666 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2667 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2668 if (d) { 2669 PetscInt rstart; 2670 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2671 *d += rstart; 2672 2673 } 2674 PetscFunctionReturn(0); 2675 } 2676 2677 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2678 { 2679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2680 2681 PetscFunctionBegin; 2682 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2683 PetscFunctionReturn(0); 2684 } 2685 2686 /* -------------------------------------------------------------------*/ 2687 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2688 MatGetRow_MPIAIJ, 2689 MatRestoreRow_MPIAIJ, 2690 MatMult_MPIAIJ, 2691 /* 4*/ MatMultAdd_MPIAIJ, 2692 MatMultTranspose_MPIAIJ, 2693 MatMultTransposeAdd_MPIAIJ, 2694 NULL, 2695 NULL, 2696 NULL, 2697 /*10*/ NULL, 2698 NULL, 2699 NULL, 2700 MatSOR_MPIAIJ, 2701 MatTranspose_MPIAIJ, 2702 /*15*/ MatGetInfo_MPIAIJ, 2703 MatEqual_MPIAIJ, 2704 MatGetDiagonal_MPIAIJ, 2705 MatDiagonalScale_MPIAIJ, 2706 MatNorm_MPIAIJ, 2707 /*20*/ MatAssemblyBegin_MPIAIJ, 2708 MatAssemblyEnd_MPIAIJ, 2709 MatSetOption_MPIAIJ, 2710 MatZeroEntries_MPIAIJ, 2711 /*24*/ MatZeroRows_MPIAIJ, 2712 NULL, 2713 NULL, 2714 NULL, 2715 NULL, 2716 /*29*/ MatSetUp_MPIAIJ, 2717 NULL, 2718 NULL, 2719 MatGetDiagonalBlock_MPIAIJ, 2720 NULL, 2721 /*34*/ MatDuplicate_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*39*/ MatAXPY_MPIAIJ, 2727 MatCreateSubMatrices_MPIAIJ, 2728 MatIncreaseOverlap_MPIAIJ, 2729 MatGetValues_MPIAIJ, 2730 MatCopy_MPIAIJ, 2731 /*44*/ MatGetRowMax_MPIAIJ, 2732 MatScale_MPIAIJ, 2733 MatShift_MPIAIJ, 2734 MatDiagonalSet_MPIAIJ, 2735 MatZeroRowsColumns_MPIAIJ, 2736 /*49*/ MatSetRandom_MPIAIJ, 2737 MatGetRowIJ_MPIAIJ, 2738 MatRestoreRowIJ_MPIAIJ, 2739 NULL, 2740 NULL, 2741 /*54*/ MatFDColoringCreate_MPIXAIJ, 2742 NULL, 2743 MatSetUnfactored_MPIAIJ, 2744 MatPermute_MPIAIJ, 2745 NULL, 2746 /*59*/ MatCreateSubMatrix_MPIAIJ, 2747 MatDestroy_MPIAIJ, 2748 MatView_MPIAIJ, 2749 NULL, 2750 NULL, 2751 /*64*/ NULL, 2752 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2757 MatGetRowMinAbs_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*75*/ MatFDColoringApply_AIJ, 2763 MatSetFromOptions_MPIAIJ, 2764 NULL, 2765 NULL, 2766 MatFindZeroDiagonals_MPIAIJ, 2767 /*80*/ NULL, 2768 NULL, 2769 NULL, 2770 /*83*/ MatLoad_MPIAIJ, 2771 MatIsSymmetric_MPIAIJ, 2772 NULL, 2773 NULL, 2774 NULL, 2775 NULL, 2776 /*89*/ NULL, 2777 NULL, 2778 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 NULL, 2785 MatBindToCPU_MPIAIJ, 2786 /*99*/ MatProductSetFromOptions_MPIAIJ, 2787 NULL, 2788 NULL, 2789 MatConjugate_MPIAIJ, 2790 NULL, 2791 /*104*/MatSetValuesRow_MPIAIJ, 2792 MatRealPart_MPIAIJ, 2793 MatImaginaryPart_MPIAIJ, 2794 NULL, 2795 NULL, 2796 /*109*/NULL, 2797 NULL, 2798 MatGetRowMin_MPIAIJ, 2799 NULL, 2800 MatMissingDiagonal_MPIAIJ, 2801 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2802 NULL, 2803 MatGetGhosts_MPIAIJ, 2804 NULL, 2805 NULL, 2806 /*119*/MatMultDiagonalBlock_MPIAIJ, 2807 NULL, 2808 NULL, 2809 NULL, 2810 MatGetMultiProcBlock_MPIAIJ, 2811 /*124*/MatFindNonzeroRows_MPIAIJ, 2812 MatGetColumnReductions_MPIAIJ, 2813 MatInvertBlockDiagonal_MPIAIJ, 2814 MatInvertVariableBlockDiagonal_MPIAIJ, 2815 MatCreateSubMatricesMPI_MPIAIJ, 2816 /*129*/NULL, 2817 NULL, 2818 NULL, 2819 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2820 NULL, 2821 /*134*/NULL, 2822 NULL, 2823 NULL, 2824 NULL, 2825 NULL, 2826 /*139*/MatSetBlockSizes_MPIAIJ, 2827 NULL, 2828 NULL, 2829 MatFDColoringSetUp_MPIXAIJ, 2830 MatFindOffBlockDiagonalEntries_MPIAIJ, 2831 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2832 /*145*/NULL, 2833 NULL, 2834 NULL, 2835 MatCreateGraph_Simple_AIJ, 2836 MatFilter_AIJ 2837 }; 2838 2839 /* ----------------------------------------------------------------------------------------*/ 2840 2841 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2842 { 2843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2844 2845 PetscFunctionBegin; 2846 PetscCall(MatStoreValues(aij->A)); 2847 PetscCall(MatStoreValues(aij->B)); 2848 PetscFunctionReturn(0); 2849 } 2850 2851 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2852 { 2853 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2854 2855 PetscFunctionBegin; 2856 PetscCall(MatRetrieveValues(aij->A)); 2857 PetscCall(MatRetrieveValues(aij->B)); 2858 PetscFunctionReturn(0); 2859 } 2860 2861 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2862 { 2863 Mat_MPIAIJ *b; 2864 PetscMPIInt size; 2865 2866 PetscFunctionBegin; 2867 PetscCall(PetscLayoutSetUp(B->rmap)); 2868 PetscCall(PetscLayoutSetUp(B->cmap)); 2869 b = (Mat_MPIAIJ*)B->data; 2870 2871 #if defined(PETSC_USE_CTABLE) 2872 PetscCall(PetscTableDestroy(&b->colmap)); 2873 #else 2874 PetscCall(PetscFree(b->colmap)); 2875 #endif 2876 PetscCall(PetscFree(b->garray)); 2877 PetscCall(VecDestroy(&b->lvec)); 2878 PetscCall(VecScatterDestroy(&b->Mvctx)); 2879 2880 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2881 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2882 PetscCall(MatDestroy(&b->B)); 2883 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2884 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2885 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2886 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2887 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2888 2889 if (!B->preallocated) { 2890 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2891 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2892 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2893 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2894 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2895 } 2896 2897 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2898 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2899 B->preallocated = PETSC_TRUE; 2900 B->was_assembled = PETSC_FALSE; 2901 B->assembled = PETSC_FALSE; 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2906 { 2907 Mat_MPIAIJ *b; 2908 2909 PetscFunctionBegin; 2910 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2911 PetscCall(PetscLayoutSetUp(B->rmap)); 2912 PetscCall(PetscLayoutSetUp(B->cmap)); 2913 b = (Mat_MPIAIJ*)B->data; 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscTableDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCall(MatResetPreallocation(b->A)); 2925 PetscCall(MatResetPreallocation(b->B)); 2926 B->preallocated = PETSC_TRUE; 2927 B->was_assembled = PETSC_FALSE; 2928 B->assembled = PETSC_FALSE; 2929 PetscFunctionReturn(0); 2930 } 2931 2932 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2933 { 2934 Mat mat; 2935 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2936 2937 PetscFunctionBegin; 2938 *newmat = NULL; 2939 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2940 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2941 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2942 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2943 a = (Mat_MPIAIJ*)mat->data; 2944 2945 mat->factortype = matin->factortype; 2946 mat->assembled = matin->assembled; 2947 mat->insertmode = NOT_SET_VALUES; 2948 mat->preallocated = matin->preallocated; 2949 2950 a->size = oldmat->size; 2951 a->rank = oldmat->rank; 2952 a->donotstash = oldmat->donotstash; 2953 a->roworiented = oldmat->roworiented; 2954 a->rowindices = NULL; 2955 a->rowvalues = NULL; 2956 a->getrowactive = PETSC_FALSE; 2957 2958 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2959 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2960 2961 if (oldmat->colmap) { 2962 #if defined(PETSC_USE_CTABLE) 2963 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2964 #else 2965 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2966 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2967 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2968 #endif 2969 } else a->colmap = NULL; 2970 if (oldmat->garray) { 2971 PetscInt len; 2972 len = oldmat->B->cmap->n; 2973 PetscCall(PetscMalloc1(len+1,&a->garray)); 2974 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2975 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2976 } else a->garray = NULL; 2977 2978 /* It may happen MatDuplicate is called with a non-assembled matrix 2979 In fact, MatDuplicate only requires the matrix to be preallocated 2980 This may happen inside a DMCreateMatrix_Shell */ 2981 if (oldmat->lvec) { 2982 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2983 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2984 } 2985 if (oldmat->Mvctx) { 2986 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2987 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2988 } 2989 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2990 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2991 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2992 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2993 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2994 *newmat = mat; 2995 PetscFunctionReturn(0); 2996 } 2997 2998 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2999 { 3000 PetscBool isbinary, ishdf5; 3001 3002 PetscFunctionBegin; 3003 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3004 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3005 /* force binary viewer to load .info file if it has not yet done so */ 3006 PetscCall(PetscViewerSetUp(viewer)); 3007 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3008 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3009 if (isbinary) { 3010 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3011 } else if (ishdf5) { 3012 #if defined(PETSC_HAVE_HDF5) 3013 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3014 #else 3015 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3016 #endif 3017 } else { 3018 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3019 } 3020 PetscFunctionReturn(0); 3021 } 3022 3023 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3024 { 3025 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3026 PetscInt *rowidxs,*colidxs; 3027 PetscScalar *matvals; 3028 3029 PetscFunctionBegin; 3030 PetscCall(PetscViewerSetUp(viewer)); 3031 3032 /* read in matrix header */ 3033 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3034 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3035 M = header[1]; N = header[2]; nz = header[3]; 3036 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3037 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3038 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3039 3040 /* set block sizes from the viewer's .info file */ 3041 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3042 /* set global sizes if not set already */ 3043 if (mat->rmap->N < 0) mat->rmap->N = M; 3044 if (mat->cmap->N < 0) mat->cmap->N = N; 3045 PetscCall(PetscLayoutSetUp(mat->rmap)); 3046 PetscCall(PetscLayoutSetUp(mat->cmap)); 3047 3048 /* check if the matrix sizes are correct */ 3049 PetscCall(MatGetSize(mat,&rows,&cols)); 3050 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3051 3052 /* read in row lengths and build row indices */ 3053 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3054 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3055 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3056 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3057 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3058 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3059 /* read in column indices and matrix values */ 3060 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3061 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3062 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3063 /* store matrix indices and values */ 3064 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3065 PetscCall(PetscFree(rowidxs)); 3066 PetscCall(PetscFree2(colidxs,matvals)); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 /* Not scalable because of ISAllGather() unless getting all columns. */ 3071 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3072 { 3073 IS iscol_local; 3074 PetscBool isstride; 3075 PetscMPIInt lisstride=0,gisstride; 3076 3077 PetscFunctionBegin; 3078 /* check if we are grabbing all columns*/ 3079 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3080 3081 if (isstride) { 3082 PetscInt start,len,mstart,mlen; 3083 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3084 PetscCall(ISGetLocalSize(iscol,&len)); 3085 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3086 if (mstart == start && mlen-mstart == len) lisstride = 1; 3087 } 3088 3089 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3090 if (gisstride) { 3091 PetscInt N; 3092 PetscCall(MatGetSize(mat,NULL,&N)); 3093 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3094 PetscCall(ISSetIdentity(iscol_local)); 3095 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3096 } else { 3097 PetscInt cbs; 3098 PetscCall(ISGetBlockSize(iscol,&cbs)); 3099 PetscCall(ISAllGather(iscol,&iscol_local)); 3100 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3101 } 3102 3103 *isseq = iscol_local; 3104 PetscFunctionReturn(0); 3105 } 3106 3107 /* 3108 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3109 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3110 3111 Input Parameters: 3112 mat - matrix 3113 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3114 i.e., mat->rstart <= isrow[i] < mat->rend 3115 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3116 i.e., mat->cstart <= iscol[i] < mat->cend 3117 Output Parameter: 3118 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3119 iscol_o - sequential column index set for retrieving mat->B 3120 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3121 */ 3122 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3123 { 3124 Vec x,cmap; 3125 const PetscInt *is_idx; 3126 PetscScalar *xarray,*cmaparray; 3127 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3128 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3129 Mat B=a->B; 3130 Vec lvec=a->lvec,lcmap; 3131 PetscInt i,cstart,cend,Bn=B->cmap->N; 3132 MPI_Comm comm; 3133 VecScatter Mvctx=a->Mvctx; 3134 3135 PetscFunctionBegin; 3136 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3137 PetscCall(ISGetLocalSize(iscol,&ncols)); 3138 3139 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3140 PetscCall(MatCreateVecs(mat,&x,NULL)); 3141 PetscCall(VecSet(x,-1.0)); 3142 PetscCall(VecDuplicate(x,&cmap)); 3143 PetscCall(VecSet(cmap,-1.0)); 3144 3145 /* Get start indices */ 3146 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3147 isstart -= ncols; 3148 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3149 3150 PetscCall(ISGetIndices(iscol,&is_idx)); 3151 PetscCall(VecGetArray(x,&xarray)); 3152 PetscCall(VecGetArray(cmap,&cmaparray)); 3153 PetscCall(PetscMalloc1(ncols,&idx)); 3154 for (i=0; i<ncols; i++) { 3155 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3156 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3157 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3158 } 3159 PetscCall(VecRestoreArray(x,&xarray)); 3160 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3161 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3162 3163 /* Get iscol_d */ 3164 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3165 PetscCall(ISGetBlockSize(iscol,&i)); 3166 PetscCall(ISSetBlockSize(*iscol_d,i)); 3167 3168 /* Get isrow_d */ 3169 PetscCall(ISGetLocalSize(isrow,&m)); 3170 rstart = mat->rmap->rstart; 3171 PetscCall(PetscMalloc1(m,&idx)); 3172 PetscCall(ISGetIndices(isrow,&is_idx)); 3173 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3174 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3175 3176 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3177 PetscCall(ISGetBlockSize(isrow,&i)); 3178 PetscCall(ISSetBlockSize(*isrow_d,i)); 3179 3180 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3181 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3182 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3183 3184 PetscCall(VecDuplicate(lvec,&lcmap)); 3185 3186 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3187 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3188 3189 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3190 /* off-process column indices */ 3191 count = 0; 3192 PetscCall(PetscMalloc1(Bn,&idx)); 3193 PetscCall(PetscMalloc1(Bn,&cmap1)); 3194 3195 PetscCall(VecGetArray(lvec,&xarray)); 3196 PetscCall(VecGetArray(lcmap,&cmaparray)); 3197 for (i=0; i<Bn; i++) { 3198 if (PetscRealPart(xarray[i]) > -1.0) { 3199 idx[count] = i; /* local column index in off-diagonal part B */ 3200 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3201 count++; 3202 } 3203 } 3204 PetscCall(VecRestoreArray(lvec,&xarray)); 3205 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3206 3207 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3208 /* cannot ensure iscol_o has same blocksize as iscol! */ 3209 3210 PetscCall(PetscFree(idx)); 3211 *garray = cmap1; 3212 3213 PetscCall(VecDestroy(&x)); 3214 PetscCall(VecDestroy(&cmap)); 3215 PetscCall(VecDestroy(&lcmap)); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3220 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3221 { 3222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3223 Mat M = NULL; 3224 MPI_Comm comm; 3225 IS iscol_d,isrow_d,iscol_o; 3226 Mat Asub = NULL,Bsub = NULL; 3227 PetscInt n; 3228 3229 PetscFunctionBegin; 3230 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3231 3232 if (call == MAT_REUSE_MATRIX) { 3233 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3234 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3235 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3236 3237 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3238 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3239 3240 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3241 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3242 3243 /* Update diagonal and off-diagonal portions of submat */ 3244 asub = (Mat_MPIAIJ*)(*submat)->data; 3245 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3246 PetscCall(ISGetLocalSize(iscol_o,&n)); 3247 if (n) { 3248 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3249 } 3250 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3251 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3252 3253 } else { /* call == MAT_INITIAL_MATRIX) */ 3254 const PetscInt *garray; 3255 PetscInt BsubN; 3256 3257 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3258 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3259 3260 /* Create local submatrices Asub and Bsub */ 3261 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3262 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3263 3264 /* Create submatrix M */ 3265 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3266 3267 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3268 asub = (Mat_MPIAIJ*)M->data; 3269 3270 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3271 n = asub->B->cmap->N; 3272 if (BsubN > n) { 3273 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3274 const PetscInt *idx; 3275 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3276 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3277 3278 PetscCall(PetscMalloc1(n,&idx_new)); 3279 j = 0; 3280 PetscCall(ISGetIndices(iscol_o,&idx)); 3281 for (i=0; i<n; i++) { 3282 if (j >= BsubN) break; 3283 while (subgarray[i] > garray[j]) j++; 3284 3285 if (subgarray[i] == garray[j]) { 3286 idx_new[i] = idx[j++]; 3287 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3288 } 3289 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3290 3291 PetscCall(ISDestroy(&iscol_o)); 3292 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3293 3294 } else if (BsubN < n) { 3295 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3296 } 3297 3298 PetscCall(PetscFree(garray)); 3299 *submat = M; 3300 3301 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3302 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3303 PetscCall(ISDestroy(&isrow_d)); 3304 3305 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3306 PetscCall(ISDestroy(&iscol_d)); 3307 3308 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3309 PetscCall(ISDestroy(&iscol_o)); 3310 } 3311 PetscFunctionReturn(0); 3312 } 3313 3314 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3315 { 3316 IS iscol_local=NULL,isrow_d; 3317 PetscInt csize; 3318 PetscInt n,i,j,start,end; 3319 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3320 MPI_Comm comm; 3321 3322 PetscFunctionBegin; 3323 /* If isrow has same processor distribution as mat, 3324 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3325 if (call == MAT_REUSE_MATRIX) { 3326 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3327 if (isrow_d) { 3328 sameRowDist = PETSC_TRUE; 3329 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3330 } else { 3331 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3332 if (iscol_local) { 3333 sameRowDist = PETSC_TRUE; 3334 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3335 } 3336 } 3337 } else { 3338 /* Check if isrow has same processor distribution as mat */ 3339 sameDist[0] = PETSC_FALSE; 3340 PetscCall(ISGetLocalSize(isrow,&n)); 3341 if (!n) { 3342 sameDist[0] = PETSC_TRUE; 3343 } else { 3344 PetscCall(ISGetMinMax(isrow,&i,&j)); 3345 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3346 if (i >= start && j < end) { 3347 sameDist[0] = PETSC_TRUE; 3348 } 3349 } 3350 3351 /* Check if iscol has same processor distribution as mat */ 3352 sameDist[1] = PETSC_FALSE; 3353 PetscCall(ISGetLocalSize(iscol,&n)); 3354 if (!n) { 3355 sameDist[1] = PETSC_TRUE; 3356 } else { 3357 PetscCall(ISGetMinMax(iscol,&i,&j)); 3358 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3359 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3360 } 3361 3362 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3363 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3364 sameRowDist = tsameDist[0]; 3365 } 3366 3367 if (sameRowDist) { 3368 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3369 /* isrow and iscol have same processor distribution as mat */ 3370 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3371 PetscFunctionReturn(0); 3372 } else { /* sameRowDist */ 3373 /* isrow has same processor distribution as mat */ 3374 if (call == MAT_INITIAL_MATRIX) { 3375 PetscBool sorted; 3376 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3377 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3378 PetscCall(ISGetSize(iscol,&i)); 3379 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3380 3381 PetscCall(ISSorted(iscol_local,&sorted)); 3382 if (sorted) { 3383 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3384 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3385 PetscFunctionReturn(0); 3386 } 3387 } else { /* call == MAT_REUSE_MATRIX */ 3388 IS iscol_sub; 3389 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3390 if (iscol_sub) { 3391 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3392 PetscFunctionReturn(0); 3393 } 3394 } 3395 } 3396 } 3397 3398 /* General case: iscol -> iscol_local which has global size of iscol */ 3399 if (call == MAT_REUSE_MATRIX) { 3400 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3401 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3402 } else { 3403 if (!iscol_local) { 3404 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3405 } 3406 } 3407 3408 PetscCall(ISGetLocalSize(iscol,&csize)); 3409 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3410 3411 if (call == MAT_INITIAL_MATRIX) { 3412 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3413 PetscCall(ISDestroy(&iscol_local)); 3414 } 3415 PetscFunctionReturn(0); 3416 } 3417 3418 /*@C 3419 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3420 and "off-diagonal" part of the matrix in CSR format. 3421 3422 Collective 3423 3424 Input Parameters: 3425 + comm - MPI communicator 3426 . A - "diagonal" portion of matrix 3427 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3428 - garray - global index of B columns 3429 3430 Output Parameter: 3431 . mat - the matrix, with input A as its local diagonal matrix 3432 Level: advanced 3433 3434 Notes: 3435 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3436 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3437 3438 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3439 @*/ 3440 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3441 { 3442 Mat_MPIAIJ *maij; 3443 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3444 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3445 const PetscScalar *oa; 3446 Mat Bnew; 3447 PetscInt m,n,N; 3448 MatType mpi_mat_type; 3449 3450 PetscFunctionBegin; 3451 PetscCall(MatCreate(comm,mat)); 3452 PetscCall(MatGetSize(A,&m,&n)); 3453 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3454 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3455 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3456 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3457 3458 /* Get global columns of mat */ 3459 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3460 3461 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3462 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3463 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3464 PetscCall(MatSetType(*mat,mpi_mat_type)); 3465 3466 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3467 maij = (Mat_MPIAIJ*)(*mat)->data; 3468 3469 (*mat)->preallocated = PETSC_TRUE; 3470 3471 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3472 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3473 3474 /* Set A as diagonal portion of *mat */ 3475 maij->A = A; 3476 3477 nz = oi[m]; 3478 for (i=0; i<nz; i++) { 3479 col = oj[i]; 3480 oj[i] = garray[col]; 3481 } 3482 3483 /* Set Bnew as off-diagonal portion of *mat */ 3484 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3485 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3486 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3487 bnew = (Mat_SeqAIJ*)Bnew->data; 3488 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3489 maij->B = Bnew; 3490 3491 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3492 3493 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3494 b->free_a = PETSC_FALSE; 3495 b->free_ij = PETSC_FALSE; 3496 PetscCall(MatDestroy(&B)); 3497 3498 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3499 bnew->free_a = PETSC_TRUE; 3500 bnew->free_ij = PETSC_TRUE; 3501 3502 /* condense columns of maij->B */ 3503 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3504 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3505 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3506 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3507 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3508 PetscFunctionReturn(0); 3509 } 3510 3511 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3512 3513 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3514 { 3515 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3516 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3517 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3518 Mat M,Msub,B=a->B; 3519 MatScalar *aa; 3520 Mat_SeqAIJ *aij; 3521 PetscInt *garray = a->garray,*colsub,Ncols; 3522 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3523 IS iscol_sub,iscmap; 3524 const PetscInt *is_idx,*cmap; 3525 PetscBool allcolumns=PETSC_FALSE; 3526 MPI_Comm comm; 3527 3528 PetscFunctionBegin; 3529 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3530 if (call == MAT_REUSE_MATRIX) { 3531 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3532 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3533 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3534 3535 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3536 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3537 3538 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3539 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3540 3541 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3542 3543 } else { /* call == MAT_INITIAL_MATRIX) */ 3544 PetscBool flg; 3545 3546 PetscCall(ISGetLocalSize(iscol,&n)); 3547 PetscCall(ISGetSize(iscol,&Ncols)); 3548 3549 /* (1) iscol -> nonscalable iscol_local */ 3550 /* Check for special case: each processor gets entire matrix columns */ 3551 PetscCall(ISIdentity(iscol_local,&flg)); 3552 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3553 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3554 if (allcolumns) { 3555 iscol_sub = iscol_local; 3556 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3557 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3558 3559 } else { 3560 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3561 PetscInt *idx,*cmap1,k; 3562 PetscCall(PetscMalloc1(Ncols,&idx)); 3563 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3564 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3565 count = 0; 3566 k = 0; 3567 for (i=0; i<Ncols; i++) { 3568 j = is_idx[i]; 3569 if (j >= cstart && j < cend) { 3570 /* diagonal part of mat */ 3571 idx[count] = j; 3572 cmap1[count++] = i; /* column index in submat */ 3573 } else if (Bn) { 3574 /* off-diagonal part of mat */ 3575 if (j == garray[k]) { 3576 idx[count] = j; 3577 cmap1[count++] = i; /* column index in submat */ 3578 } else if (j > garray[k]) { 3579 while (j > garray[k] && k < Bn-1) k++; 3580 if (j == garray[k]) { 3581 idx[count] = j; 3582 cmap1[count++] = i; /* column index in submat */ 3583 } 3584 } 3585 } 3586 } 3587 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3588 3589 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3590 PetscCall(ISGetBlockSize(iscol,&cbs)); 3591 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3592 3593 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3594 } 3595 3596 /* (3) Create sequential Msub */ 3597 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3598 } 3599 3600 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3601 aij = (Mat_SeqAIJ*)(Msub)->data; 3602 ii = aij->i; 3603 PetscCall(ISGetIndices(iscmap,&cmap)); 3604 3605 /* 3606 m - number of local rows 3607 Ncols - number of columns (same on all processors) 3608 rstart - first row in new global matrix generated 3609 */ 3610 PetscCall(MatGetSize(Msub,&m,NULL)); 3611 3612 if (call == MAT_INITIAL_MATRIX) { 3613 /* (4) Create parallel newmat */ 3614 PetscMPIInt rank,size; 3615 PetscInt csize; 3616 3617 PetscCallMPI(MPI_Comm_size(comm,&size)); 3618 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3619 3620 /* 3621 Determine the number of non-zeros in the diagonal and off-diagonal 3622 portions of the matrix in order to do correct preallocation 3623 */ 3624 3625 /* first get start and end of "diagonal" columns */ 3626 PetscCall(ISGetLocalSize(iscol,&csize)); 3627 if (csize == PETSC_DECIDE) { 3628 PetscCall(ISGetSize(isrow,&mglobal)); 3629 if (mglobal == Ncols) { /* square matrix */ 3630 nlocal = m; 3631 } else { 3632 nlocal = Ncols/size + ((Ncols % size) > rank); 3633 } 3634 } else { 3635 nlocal = csize; 3636 } 3637 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3638 rstart = rend - nlocal; 3639 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3640 3641 /* next, compute all the lengths */ 3642 jj = aij->j; 3643 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3644 olens = dlens + m; 3645 for (i=0; i<m; i++) { 3646 jend = ii[i+1] - ii[i]; 3647 olen = 0; 3648 dlen = 0; 3649 for (j=0; j<jend; j++) { 3650 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3651 else dlen++; 3652 jj++; 3653 } 3654 olens[i] = olen; 3655 dlens[i] = dlen; 3656 } 3657 3658 PetscCall(ISGetBlockSize(isrow,&bs)); 3659 PetscCall(ISGetBlockSize(iscol,&cbs)); 3660 3661 PetscCall(MatCreate(comm,&M)); 3662 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3663 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3664 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3665 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3666 PetscCall(PetscFree(dlens)); 3667 3668 } else { /* call == MAT_REUSE_MATRIX */ 3669 M = *newmat; 3670 PetscCall(MatGetLocalSize(M,&i,NULL)); 3671 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3672 PetscCall(MatZeroEntries(M)); 3673 /* 3674 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3675 rather than the slower MatSetValues(). 3676 */ 3677 M->was_assembled = PETSC_TRUE; 3678 M->assembled = PETSC_FALSE; 3679 } 3680 3681 /* (5) Set values of Msub to *newmat */ 3682 PetscCall(PetscMalloc1(count,&colsub)); 3683 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3684 3685 jj = aij->j; 3686 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3687 for (i=0; i<m; i++) { 3688 row = rstart + i; 3689 nz = ii[i+1] - ii[i]; 3690 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3691 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3692 jj += nz; aa += nz; 3693 } 3694 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3695 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3696 3697 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3698 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3699 3700 PetscCall(PetscFree(colsub)); 3701 3702 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3703 if (call == MAT_INITIAL_MATRIX) { 3704 *newmat = M; 3705 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3706 PetscCall(MatDestroy(&Msub)); 3707 3708 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3709 PetscCall(ISDestroy(&iscol_sub)); 3710 3711 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3712 PetscCall(ISDestroy(&iscmap)); 3713 3714 if (iscol_local) { 3715 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3716 PetscCall(ISDestroy(&iscol_local)); 3717 } 3718 } 3719 PetscFunctionReturn(0); 3720 } 3721 3722 /* 3723 Not great since it makes two copies of the submatrix, first an SeqAIJ 3724 in local and then by concatenating the local matrices the end result. 3725 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3726 3727 Note: This requires a sequential iscol with all indices. 3728 */ 3729 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3730 { 3731 PetscMPIInt rank,size; 3732 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3733 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3734 Mat M,Mreuse; 3735 MatScalar *aa,*vwork; 3736 MPI_Comm comm; 3737 Mat_SeqAIJ *aij; 3738 PetscBool colflag,allcolumns=PETSC_FALSE; 3739 3740 PetscFunctionBegin; 3741 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3742 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3743 PetscCallMPI(MPI_Comm_size(comm,&size)); 3744 3745 /* Check for special case: each processor gets entire matrix columns */ 3746 PetscCall(ISIdentity(iscol,&colflag)); 3747 PetscCall(ISGetLocalSize(iscol,&n)); 3748 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3749 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3750 3751 if (call == MAT_REUSE_MATRIX) { 3752 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3753 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3754 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3755 } else { 3756 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3757 } 3758 3759 /* 3760 m - number of local rows 3761 n - number of columns (same on all processors) 3762 rstart - first row in new global matrix generated 3763 */ 3764 PetscCall(MatGetSize(Mreuse,&m,&n)); 3765 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3766 if (call == MAT_INITIAL_MATRIX) { 3767 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3768 ii = aij->i; 3769 jj = aij->j; 3770 3771 /* 3772 Determine the number of non-zeros in the diagonal and off-diagonal 3773 portions of the matrix in order to do correct preallocation 3774 */ 3775 3776 /* first get start and end of "diagonal" columns */ 3777 if (csize == PETSC_DECIDE) { 3778 PetscCall(ISGetSize(isrow,&mglobal)); 3779 if (mglobal == n) { /* square matrix */ 3780 nlocal = m; 3781 } else { 3782 nlocal = n/size + ((n % size) > rank); 3783 } 3784 } else { 3785 nlocal = csize; 3786 } 3787 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3788 rstart = rend - nlocal; 3789 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3790 3791 /* next, compute all the lengths */ 3792 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3793 olens = dlens + m; 3794 for (i=0; i<m; i++) { 3795 jend = ii[i+1] - ii[i]; 3796 olen = 0; 3797 dlen = 0; 3798 for (j=0; j<jend; j++) { 3799 if (*jj < rstart || *jj >= rend) olen++; 3800 else dlen++; 3801 jj++; 3802 } 3803 olens[i] = olen; 3804 dlens[i] = dlen; 3805 } 3806 PetscCall(MatCreate(comm,&M)); 3807 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3808 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3809 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3810 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3811 PetscCall(PetscFree(dlens)); 3812 } else { 3813 PetscInt ml,nl; 3814 3815 M = *newmat; 3816 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3817 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3818 PetscCall(MatZeroEntries(M)); 3819 /* 3820 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3821 rather than the slower MatSetValues(). 3822 */ 3823 M->was_assembled = PETSC_TRUE; 3824 M->assembled = PETSC_FALSE; 3825 } 3826 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3827 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3828 ii = aij->i; 3829 jj = aij->j; 3830 3831 /* trigger copy to CPU if needed */ 3832 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3833 for (i=0; i<m; i++) { 3834 row = rstart + i; 3835 nz = ii[i+1] - ii[i]; 3836 cwork = jj; jj += nz; 3837 vwork = aa; aa += nz; 3838 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3839 } 3840 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3841 3842 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3843 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3844 *newmat = M; 3845 3846 /* save submatrix used in processor for next request */ 3847 if (call == MAT_INITIAL_MATRIX) { 3848 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3849 PetscCall(MatDestroy(&Mreuse)); 3850 } 3851 PetscFunctionReturn(0); 3852 } 3853 3854 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3855 { 3856 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3857 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3858 const PetscInt *JJ; 3859 PetscBool nooffprocentries; 3860 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3861 3862 PetscFunctionBegin; 3863 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3864 3865 PetscCall(PetscLayoutSetUp(B->rmap)); 3866 PetscCall(PetscLayoutSetUp(B->cmap)); 3867 m = B->rmap->n; 3868 cstart = B->cmap->rstart; 3869 cend = B->cmap->rend; 3870 rstart = B->rmap->rstart; 3871 3872 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3873 3874 if (PetscDefined(USE_DEBUG)) { 3875 for (i=0; i<m; i++) { 3876 nnz = Ii[i+1]- Ii[i]; 3877 JJ = J + Ii[i]; 3878 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3879 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3880 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3881 } 3882 } 3883 3884 for (i=0; i<m; i++) { 3885 nnz = Ii[i+1]- Ii[i]; 3886 JJ = J + Ii[i]; 3887 nnz_max = PetscMax(nnz_max,nnz); 3888 d = 0; 3889 for (j=0; j<nnz; j++) { 3890 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3891 } 3892 d_nnz[i] = d; 3893 o_nnz[i] = nnz - d; 3894 } 3895 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3896 PetscCall(PetscFree2(d_nnz,o_nnz)); 3897 3898 for (i=0; i<m; i++) { 3899 ii = i + rstart; 3900 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3901 } 3902 nooffprocentries = B->nooffprocentries; 3903 B->nooffprocentries = PETSC_TRUE; 3904 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3905 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3906 B->nooffprocentries = nooffprocentries; 3907 3908 /* count number of entries below block diagonal */ 3909 PetscCall(PetscFree(Aij->ld)); 3910 PetscCall(PetscCalloc1(m,&ld)); 3911 Aij->ld = ld; 3912 for (i=0; i<m; i++) { 3913 nnz = Ii[i+1] - Ii[i]; 3914 j = 0; 3915 while (j < nnz && J[j] < cstart) {j++;} 3916 ld[i] = j; 3917 J += nnz; 3918 } 3919 3920 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3921 PetscFunctionReturn(0); 3922 } 3923 3924 /*@ 3925 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3926 (the default parallel PETSc format). 3927 3928 Collective 3929 3930 Input Parameters: 3931 + B - the matrix 3932 . i - the indices into j for the start of each local row (starts with zero) 3933 . j - the column indices for each local row (starts with zero) 3934 - v - optional values in the matrix 3935 3936 Level: developer 3937 3938 Notes: 3939 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3940 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3941 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3942 3943 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3944 3945 The format which is used for the sparse matrix input, is equivalent to a 3946 row-major ordering.. i.e for the following matrix, the input data expected is 3947 as shown 3948 3949 $ 1 0 0 3950 $ 2 0 3 P0 3951 $ ------- 3952 $ 4 5 6 P1 3953 $ 3954 $ Process0 [P0]: rows_owned=[0,1] 3955 $ i = {0,1,3} [size = nrow+1 = 2+1] 3956 $ j = {0,0,2} [size = 3] 3957 $ v = {1,2,3} [size = 3] 3958 $ 3959 $ Process1 [P1]: rows_owned=[2] 3960 $ i = {0,3} [size = nrow+1 = 1+1] 3961 $ j = {0,1,2} [size = 3] 3962 $ v = {4,5,6} [size = 3] 3963 3964 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3965 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3966 @*/ 3967 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3968 { 3969 PetscFunctionBegin; 3970 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3971 PetscFunctionReturn(0); 3972 } 3973 3974 /*@C 3975 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3976 (the default parallel PETSc format). For good matrix assembly performance 3977 the user should preallocate the matrix storage by setting the parameters 3978 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3979 performance can be increased by more than a factor of 50. 3980 3981 Collective 3982 3983 Input Parameters: 3984 + B - the matrix 3985 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3986 (same value is used for all local rows) 3987 . d_nnz - array containing the number of nonzeros in the various rows of the 3988 DIAGONAL portion of the local submatrix (possibly different for each row) 3989 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3990 The size of this array is equal to the number of local rows, i.e 'm'. 3991 For matrices that will be factored, you must leave room for (and set) 3992 the diagonal entry even if it is zero. 3993 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3994 submatrix (same value is used for all local rows). 3995 - o_nnz - array containing the number of nonzeros in the various rows of the 3996 OFF-DIAGONAL portion of the local submatrix (possibly different for 3997 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3998 structure. The size of this array is equal to the number 3999 of local rows, i.e 'm'. 4000 4001 If the *_nnz parameter is given then the *_nz parameter is ignored 4002 4003 The AIJ format (also called the Yale sparse matrix format or 4004 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4005 storage. The stored row and column indices begin with zero. 4006 See Users-Manual: ch_mat for details. 4007 4008 The parallel matrix is partitioned such that the first m0 rows belong to 4009 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4010 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4011 4012 The DIAGONAL portion of the local submatrix of a processor can be defined 4013 as the submatrix which is obtained by extraction the part corresponding to 4014 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4015 first row that belongs to the processor, r2 is the last row belonging to 4016 the this processor, and c1-c2 is range of indices of the local part of a 4017 vector suitable for applying the matrix to. This is an mxn matrix. In the 4018 common case of a square matrix, the row and column ranges are the same and 4019 the DIAGONAL part is also square. The remaining portion of the local 4020 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4021 4022 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4023 4024 You can call MatGetInfo() to get information on how effective the preallocation was; 4025 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4026 You can also run with the option -info and look for messages with the string 4027 malloc in them to see if additional memory allocation was needed. 4028 4029 Example usage: 4030 4031 Consider the following 8x8 matrix with 34 non-zero values, that is 4032 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4033 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4034 as follows: 4035 4036 .vb 4037 1 2 0 | 0 3 0 | 0 4 4038 Proc0 0 5 6 | 7 0 0 | 8 0 4039 9 0 10 | 11 0 0 | 12 0 4040 ------------------------------------- 4041 13 0 14 | 15 16 17 | 0 0 4042 Proc1 0 18 0 | 19 20 21 | 0 0 4043 0 0 0 | 22 23 0 | 24 0 4044 ------------------------------------- 4045 Proc2 25 26 27 | 0 0 28 | 29 0 4046 30 0 0 | 31 32 33 | 0 34 4047 .ve 4048 4049 This can be represented as a collection of submatrices as: 4050 4051 .vb 4052 A B C 4053 D E F 4054 G H I 4055 .ve 4056 4057 Where the submatrices A,B,C are owned by proc0, D,E,F are 4058 owned by proc1, G,H,I are owned by proc2. 4059 4060 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4061 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4062 The 'M','N' parameters are 8,8, and have the same values on all procs. 4063 4064 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4065 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4066 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4067 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4068 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4069 matrix, ans [DF] as another SeqAIJ matrix. 4070 4071 When d_nz, o_nz parameters are specified, d_nz storage elements are 4072 allocated for every row of the local diagonal submatrix, and o_nz 4073 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4074 One way to choose d_nz and o_nz is to use the max nonzerors per local 4075 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4076 In this case, the values of d_nz,o_nz are: 4077 .vb 4078 proc0 : dnz = 2, o_nz = 2 4079 proc1 : dnz = 3, o_nz = 2 4080 proc2 : dnz = 1, o_nz = 4 4081 .ve 4082 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4083 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4084 for proc3. i.e we are using 12+15+10=37 storage locations to store 4085 34 values. 4086 4087 When d_nnz, o_nnz parameters are specified, the storage is specified 4088 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4089 In the above case the values for d_nnz,o_nnz are: 4090 .vb 4091 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4092 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4093 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4094 .ve 4095 Here the space allocated is sum of all the above values i.e 34, and 4096 hence pre-allocation is perfect. 4097 4098 Level: intermediate 4099 4100 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4101 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4102 @*/ 4103 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4104 { 4105 PetscFunctionBegin; 4106 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4107 PetscValidType(B,1); 4108 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4109 PetscFunctionReturn(0); 4110 } 4111 4112 /*@ 4113 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4114 CSR format for the local rows. 4115 4116 Collective 4117 4118 Input Parameters: 4119 + comm - MPI communicator 4120 . m - number of local rows (Cannot be PETSC_DECIDE) 4121 . n - This value should be the same as the local size used in creating the 4122 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4123 calculated if N is given) For square matrices n is almost always m. 4124 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4125 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4126 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4127 . j - column indices 4128 - a - optional matrix values 4129 4130 Output Parameter: 4131 . mat - the matrix 4132 4133 Level: intermediate 4134 4135 Notes: 4136 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4137 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4138 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4139 4140 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4141 4142 The format which is used for the sparse matrix input, is equivalent to a 4143 row-major ordering.. i.e for the following matrix, the input data expected is 4144 as shown 4145 4146 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4147 4148 $ 1 0 0 4149 $ 2 0 3 P0 4150 $ ------- 4151 $ 4 5 6 P1 4152 $ 4153 $ Process0 [P0]: rows_owned=[0,1] 4154 $ i = {0,1,3} [size = nrow+1 = 2+1] 4155 $ j = {0,0,2} [size = 3] 4156 $ v = {1,2,3} [size = 3] 4157 $ 4158 $ Process1 [P1]: rows_owned=[2] 4159 $ i = {0,3} [size = nrow+1 = 1+1] 4160 $ j = {0,1,2} [size = 3] 4161 $ v = {4,5,6} [size = 3] 4162 4163 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4164 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4165 @*/ 4166 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4167 { 4168 PetscFunctionBegin; 4169 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4170 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4171 PetscCall(MatCreate(comm,mat)); 4172 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4173 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4174 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4175 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4176 PetscFunctionReturn(0); 4177 } 4178 4179 /*@ 4180 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4181 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4182 4183 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4184 4185 Collective 4186 4187 Input Parameters: 4188 + mat - the matrix 4189 . m - number of local rows (Cannot be PETSC_DECIDE) 4190 . n - This value should be the same as the local size used in creating the 4191 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4192 calculated if N is given) For square matrices n is almost always m. 4193 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4194 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4195 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4196 . J - column indices 4197 - v - matrix values 4198 4199 Level: intermediate 4200 4201 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4202 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4203 @*/ 4204 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4205 { 4206 PetscInt nnz,i; 4207 PetscBool nooffprocentries; 4208 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4209 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4210 PetscScalar *ad,*ao; 4211 PetscInt ldi,Iii,md; 4212 const PetscInt *Adi = Ad->i; 4213 PetscInt *ld = Aij->ld; 4214 4215 PetscFunctionBegin; 4216 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4217 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4218 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4219 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4220 4221 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4222 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4223 4224 for (i=0; i<m; i++) { 4225 nnz = Ii[i+1]- Ii[i]; 4226 Iii = Ii[i]; 4227 ldi = ld[i]; 4228 md = Adi[i+1]-Adi[i]; 4229 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4230 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4231 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4232 ad += md; 4233 ao += nnz - md; 4234 } 4235 nooffprocentries = mat->nooffprocentries; 4236 mat->nooffprocentries = PETSC_TRUE; 4237 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4238 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4239 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4240 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4241 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4242 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4243 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4244 mat->nooffprocentries = nooffprocentries; 4245 PetscFunctionReturn(0); 4246 } 4247 4248 /*@ 4249 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4250 4251 Collective 4252 4253 Input Parameters: 4254 + mat - the matrix 4255 - v - matrix values, stored by row 4256 4257 Level: intermediate 4258 4259 Notes: 4260 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4261 4262 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4263 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4264 @*/ 4265 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4266 { 4267 PetscInt nnz,i,m; 4268 PetscBool nooffprocentries; 4269 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4270 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4271 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4272 PetscScalar *ad,*ao; 4273 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4274 PetscInt ldi,Iii,md; 4275 PetscInt *ld = Aij->ld; 4276 4277 PetscFunctionBegin; 4278 m = mat->rmap->n; 4279 4280 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4281 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4282 Iii = 0; 4283 for (i=0; i<m; i++) { 4284 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4285 ldi = ld[i]; 4286 md = Adi[i+1]-Adi[i]; 4287 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4288 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4289 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4290 ad += md; 4291 ao += nnz - md; 4292 Iii += nnz; 4293 } 4294 nooffprocentries = mat->nooffprocentries; 4295 mat->nooffprocentries = PETSC_TRUE; 4296 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4297 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4298 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4299 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4300 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4301 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4302 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4303 mat->nooffprocentries = nooffprocentries; 4304 PetscFunctionReturn(0); 4305 } 4306 4307 /*@C 4308 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4309 (the default parallel PETSc format). For good matrix assembly performance 4310 the user should preallocate the matrix storage by setting the parameters 4311 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4312 performance can be increased by more than a factor of 50. 4313 4314 Collective 4315 4316 Input Parameters: 4317 + comm - MPI communicator 4318 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4319 This value should be the same as the local size used in creating the 4320 y vector for the matrix-vector product y = Ax. 4321 . n - This value should be the same as the local size used in creating the 4322 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4323 calculated if N is given) For square matrices n is almost always m. 4324 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4325 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4326 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4327 (same value is used for all local rows) 4328 . d_nnz - array containing the number of nonzeros in the various rows of the 4329 DIAGONAL portion of the local submatrix (possibly different for each row) 4330 or NULL, if d_nz is used to specify the nonzero structure. 4331 The size of this array is equal to the number of local rows, i.e 'm'. 4332 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4333 submatrix (same value is used for all local rows). 4334 - o_nnz - array containing the number of nonzeros in the various rows of the 4335 OFF-DIAGONAL portion of the local submatrix (possibly different for 4336 each row) or NULL, if o_nz is used to specify the nonzero 4337 structure. The size of this array is equal to the number 4338 of local rows, i.e 'm'. 4339 4340 Output Parameter: 4341 . A - the matrix 4342 4343 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4344 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4345 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4346 4347 Notes: 4348 If the *_nnz parameter is given then the *_nz parameter is ignored 4349 4350 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4351 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4352 storage requirements for this matrix. 4353 4354 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4355 processor than it must be used on all processors that share the object for 4356 that argument. 4357 4358 The user MUST specify either the local or global matrix dimensions 4359 (possibly both). 4360 4361 The parallel matrix is partitioned across processors such that the 4362 first m0 rows belong to process 0, the next m1 rows belong to 4363 process 1, the next m2 rows belong to process 2 etc.. where 4364 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4365 values corresponding to [m x N] submatrix. 4366 4367 The columns are logically partitioned with the n0 columns belonging 4368 to 0th partition, the next n1 columns belonging to the next 4369 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4370 4371 The DIAGONAL portion of the local submatrix on any given processor 4372 is the submatrix corresponding to the rows and columns m,n 4373 corresponding to the given processor. i.e diagonal matrix on 4374 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4375 etc. The remaining portion of the local submatrix [m x (N-n)] 4376 constitute the OFF-DIAGONAL portion. The example below better 4377 illustrates this concept. 4378 4379 For a square global matrix we define each processor's diagonal portion 4380 to be its local rows and the corresponding columns (a square submatrix); 4381 each processor's off-diagonal portion encompasses the remainder of the 4382 local matrix (a rectangular submatrix). 4383 4384 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4385 4386 When calling this routine with a single process communicator, a matrix of 4387 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4388 type of communicator, use the construction mechanism 4389 .vb 4390 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4391 .ve 4392 4393 $ MatCreate(...,&A); 4394 $ MatSetType(A,MATMPIAIJ); 4395 $ MatSetSizes(A, m,n,M,N); 4396 $ MatMPIAIJSetPreallocation(A,...); 4397 4398 By default, this format uses inodes (identical nodes) when possible. 4399 We search for consecutive rows with the same nonzero structure, thereby 4400 reusing matrix information to achieve increased efficiency. 4401 4402 Options Database Keys: 4403 + -mat_no_inode - Do not use inodes 4404 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4405 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4406 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4407 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4408 4409 Example usage: 4410 4411 Consider the following 8x8 matrix with 34 non-zero values, that is 4412 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4413 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4414 as follows 4415 4416 .vb 4417 1 2 0 | 0 3 0 | 0 4 4418 Proc0 0 5 6 | 7 0 0 | 8 0 4419 9 0 10 | 11 0 0 | 12 0 4420 ------------------------------------- 4421 13 0 14 | 15 16 17 | 0 0 4422 Proc1 0 18 0 | 19 20 21 | 0 0 4423 0 0 0 | 22 23 0 | 24 0 4424 ------------------------------------- 4425 Proc2 25 26 27 | 0 0 28 | 29 0 4426 30 0 0 | 31 32 33 | 0 34 4427 .ve 4428 4429 This can be represented as a collection of submatrices as 4430 4431 .vb 4432 A B C 4433 D E F 4434 G H I 4435 .ve 4436 4437 Where the submatrices A,B,C are owned by proc0, D,E,F are 4438 owned by proc1, G,H,I are owned by proc2. 4439 4440 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4441 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4442 The 'M','N' parameters are 8,8, and have the same values on all procs. 4443 4444 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4445 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4446 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4447 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4448 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4449 matrix, ans [DF] as another SeqAIJ matrix. 4450 4451 When d_nz, o_nz parameters are specified, d_nz storage elements are 4452 allocated for every row of the local diagonal submatrix, and o_nz 4453 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4454 One way to choose d_nz and o_nz is to use the max nonzerors per local 4455 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4456 In this case, the values of d_nz,o_nz are 4457 .vb 4458 proc0 : dnz = 2, o_nz = 2 4459 proc1 : dnz = 3, o_nz = 2 4460 proc2 : dnz = 1, o_nz = 4 4461 .ve 4462 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4463 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4464 for proc3. i.e we are using 12+15+10=37 storage locations to store 4465 34 values. 4466 4467 When d_nnz, o_nnz parameters are specified, the storage is specified 4468 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4469 In the above case the values for d_nnz,o_nnz are 4470 .vb 4471 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4472 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4473 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4474 .ve 4475 Here the space allocated is sum of all the above values i.e 34, and 4476 hence pre-allocation is perfect. 4477 4478 Level: intermediate 4479 4480 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4481 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4482 @*/ 4483 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4484 { 4485 PetscMPIInt size; 4486 4487 PetscFunctionBegin; 4488 PetscCall(MatCreate(comm,A)); 4489 PetscCall(MatSetSizes(*A,m,n,M,N)); 4490 PetscCallMPI(MPI_Comm_size(comm,&size)); 4491 if (size > 1) { 4492 PetscCall(MatSetType(*A,MATMPIAIJ)); 4493 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4494 } else { 4495 PetscCall(MatSetType(*A,MATSEQAIJ)); 4496 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4497 } 4498 PetscFunctionReturn(0); 4499 } 4500 4501 /*@C 4502 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4503 4504 Not collective 4505 4506 Input Parameter: 4507 . A - The MPIAIJ matrix 4508 4509 Output Parameters: 4510 + Ad - The local diagonal block as a SeqAIJ matrix 4511 . Ao - The local off-diagonal block as a SeqAIJ matrix 4512 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4513 4514 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4515 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4516 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4517 local column numbers to global column numbers in the original matrix. 4518 4519 Level: intermediate 4520 4521 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4522 @*/ 4523 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4524 { 4525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4526 PetscBool flg; 4527 4528 PetscFunctionBegin; 4529 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4530 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4531 if (Ad) *Ad = a->A; 4532 if (Ao) *Ao = a->B; 4533 if (colmap) *colmap = a->garray; 4534 PetscFunctionReturn(0); 4535 } 4536 4537 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4538 { 4539 PetscInt m,N,i,rstart,nnz,Ii; 4540 PetscInt *indx; 4541 PetscScalar *values; 4542 MatType rootType; 4543 4544 PetscFunctionBegin; 4545 PetscCall(MatGetSize(inmat,&m,&N)); 4546 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4547 PetscInt *dnz,*onz,sum,bs,cbs; 4548 4549 if (n == PETSC_DECIDE) { 4550 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4551 } 4552 /* Check sum(n) = N */ 4553 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4554 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4555 4556 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4557 rstart -= m; 4558 4559 MatPreallocateBegin(comm,m,n,dnz,onz); 4560 for (i=0; i<m; i++) { 4561 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4562 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4563 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4564 } 4565 4566 PetscCall(MatCreate(comm,outmat)); 4567 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4568 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4569 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4570 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4571 PetscCall(MatSetType(*outmat,rootType)); 4572 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4573 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4574 MatPreallocateEnd(dnz,onz); 4575 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4576 } 4577 4578 /* numeric phase */ 4579 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4580 for (i=0; i<m; i++) { 4581 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4582 Ii = i + rstart; 4583 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4584 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4585 } 4586 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4587 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4588 PetscFunctionReturn(0); 4589 } 4590 4591 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4592 { 4593 PetscMPIInt rank; 4594 PetscInt m,N,i,rstart,nnz; 4595 size_t len; 4596 const PetscInt *indx; 4597 PetscViewer out; 4598 char *name; 4599 Mat B; 4600 const PetscScalar *values; 4601 4602 PetscFunctionBegin; 4603 PetscCall(MatGetLocalSize(A,&m,NULL)); 4604 PetscCall(MatGetSize(A,NULL,&N)); 4605 /* Should this be the type of the diagonal block of A? */ 4606 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4607 PetscCall(MatSetSizes(B,m,N,m,N)); 4608 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4609 PetscCall(MatSetType(B,MATSEQAIJ)); 4610 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4611 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4612 for (i=0; i<m; i++) { 4613 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4614 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4615 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4616 } 4617 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4618 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4619 4620 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4621 PetscCall(PetscStrlen(outfile,&len)); 4622 PetscCall(PetscMalloc1(len+6,&name)); 4623 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4624 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4625 PetscCall(PetscFree(name)); 4626 PetscCall(MatView(B,out)); 4627 PetscCall(PetscViewerDestroy(&out)); 4628 PetscCall(MatDestroy(&B)); 4629 PetscFunctionReturn(0); 4630 } 4631 4632 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4633 { 4634 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4635 4636 PetscFunctionBegin; 4637 if (!merge) PetscFunctionReturn(0); 4638 PetscCall(PetscFree(merge->id_r)); 4639 PetscCall(PetscFree(merge->len_s)); 4640 PetscCall(PetscFree(merge->len_r)); 4641 PetscCall(PetscFree(merge->bi)); 4642 PetscCall(PetscFree(merge->bj)); 4643 PetscCall(PetscFree(merge->buf_ri[0])); 4644 PetscCall(PetscFree(merge->buf_ri)); 4645 PetscCall(PetscFree(merge->buf_rj[0])); 4646 PetscCall(PetscFree(merge->buf_rj)); 4647 PetscCall(PetscFree(merge->coi)); 4648 PetscCall(PetscFree(merge->coj)); 4649 PetscCall(PetscFree(merge->owners_co)); 4650 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4651 PetscCall(PetscFree(merge)); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 #include <../src/mat/utils/freespace.h> 4656 #include <petscbt.h> 4657 4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4659 { 4660 MPI_Comm comm; 4661 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4662 PetscMPIInt size,rank,taga,*len_s; 4663 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4664 PetscInt proc,m; 4665 PetscInt **buf_ri,**buf_rj; 4666 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4667 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4668 MPI_Request *s_waits,*r_waits; 4669 MPI_Status *status; 4670 const MatScalar *aa,*a_a; 4671 MatScalar **abuf_r,*ba_i; 4672 Mat_Merge_SeqsToMPI *merge; 4673 PetscContainer container; 4674 4675 PetscFunctionBegin; 4676 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4677 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4678 4679 PetscCallMPI(MPI_Comm_size(comm,&size)); 4680 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4681 4682 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4683 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4684 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4685 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4686 aa = a_a; 4687 4688 bi = merge->bi; 4689 bj = merge->bj; 4690 buf_ri = merge->buf_ri; 4691 buf_rj = merge->buf_rj; 4692 4693 PetscCall(PetscMalloc1(size,&status)); 4694 owners = merge->rowmap->range; 4695 len_s = merge->len_s; 4696 4697 /* send and recv matrix values */ 4698 /*-----------------------------*/ 4699 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4700 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4701 4702 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4703 for (proc=0,k=0; proc<size; proc++) { 4704 if (!len_s[proc]) continue; 4705 i = owners[proc]; 4706 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4707 k++; 4708 } 4709 4710 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4711 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4712 PetscCall(PetscFree(status)); 4713 4714 PetscCall(PetscFree(s_waits)); 4715 PetscCall(PetscFree(r_waits)); 4716 4717 /* insert mat values of mpimat */ 4718 /*----------------------------*/ 4719 PetscCall(PetscMalloc1(N,&ba_i)); 4720 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4721 4722 for (k=0; k<merge->nrecv; k++) { 4723 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4724 nrows = *(buf_ri_k[k]); 4725 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4726 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4727 } 4728 4729 /* set values of ba */ 4730 m = merge->rowmap->n; 4731 for (i=0; i<m; i++) { 4732 arow = owners[rank] + i; 4733 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4734 bnzi = bi[i+1] - bi[i]; 4735 PetscCall(PetscArrayzero(ba_i,bnzi)); 4736 4737 /* add local non-zero vals of this proc's seqmat into ba */ 4738 anzi = ai[arow+1] - ai[arow]; 4739 aj = a->j + ai[arow]; 4740 aa = a_a + ai[arow]; 4741 nextaj = 0; 4742 for (j=0; nextaj<anzi; j++) { 4743 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4744 ba_i[j] += aa[nextaj++]; 4745 } 4746 } 4747 4748 /* add received vals into ba */ 4749 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4750 /* i-th row */ 4751 if (i == *nextrow[k]) { 4752 anzi = *(nextai[k]+1) - *nextai[k]; 4753 aj = buf_rj[k] + *(nextai[k]); 4754 aa = abuf_r[k] + *(nextai[k]); 4755 nextaj = 0; 4756 for (j=0; nextaj<anzi; j++) { 4757 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4758 ba_i[j] += aa[nextaj++]; 4759 } 4760 } 4761 nextrow[k]++; nextai[k]++; 4762 } 4763 } 4764 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4765 } 4766 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4767 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4768 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4769 4770 PetscCall(PetscFree(abuf_r[0])); 4771 PetscCall(PetscFree(abuf_r)); 4772 PetscCall(PetscFree(ba_i)); 4773 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4774 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4775 PetscFunctionReturn(0); 4776 } 4777 4778 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4779 { 4780 Mat B_mpi; 4781 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4782 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4783 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4784 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4785 PetscInt len,proc,*dnz,*onz,bs,cbs; 4786 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi; 4787 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4788 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4789 MPI_Status *status; 4790 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4791 PetscBT lnkbt; 4792 Mat_Merge_SeqsToMPI *merge; 4793 PetscContainer container; 4794 4795 PetscFunctionBegin; 4796 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4797 4798 /* make sure it is a PETSc comm */ 4799 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4800 PetscCallMPI(MPI_Comm_size(comm,&size)); 4801 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4802 4803 PetscCall(PetscNew(&merge)); 4804 PetscCall(PetscMalloc1(size,&status)); 4805 4806 /* determine row ownership */ 4807 /*---------------------------------------------------------*/ 4808 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4809 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4810 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4811 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4812 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4813 PetscCall(PetscMalloc1(size,&len_si)); 4814 PetscCall(PetscMalloc1(size,&merge->len_s)); 4815 4816 m = merge->rowmap->n; 4817 owners = merge->rowmap->range; 4818 4819 /* determine the number of messages to send, their lengths */ 4820 /*---------------------------------------------------------*/ 4821 len_s = merge->len_s; 4822 4823 len = 0; /* length of buf_si[] */ 4824 merge->nsend = 0; 4825 for (proc=0; proc<size; proc++) { 4826 len_si[proc] = 0; 4827 if (proc == rank) { 4828 len_s[proc] = 0; 4829 } else { 4830 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4831 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4832 } 4833 if (len_s[proc]) { 4834 merge->nsend++; 4835 nrows = 0; 4836 for (i=owners[proc]; i<owners[proc+1]; i++) { 4837 if (ai[i+1] > ai[i]) nrows++; 4838 } 4839 len_si[proc] = 2*(nrows+1); 4840 len += len_si[proc]; 4841 } 4842 } 4843 4844 /* determine the number and length of messages to receive for ij-structure */ 4845 /*-------------------------------------------------------------------------*/ 4846 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4847 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4848 4849 /* post the Irecv of j-structure */ 4850 /*-------------------------------*/ 4851 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4852 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4853 4854 /* post the Isend of j-structure */ 4855 /*--------------------------------*/ 4856 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4857 4858 for (proc=0, k=0; proc<size; proc++) { 4859 if (!len_s[proc]) continue; 4860 i = owners[proc]; 4861 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4862 k++; 4863 } 4864 4865 /* receives and sends of j-structure are complete */ 4866 /*------------------------------------------------*/ 4867 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4868 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4869 4870 /* send and recv i-structure */ 4871 /*---------------------------*/ 4872 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4873 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4874 4875 PetscCall(PetscMalloc1(len+1,&buf_s)); 4876 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4877 for (proc=0,k=0; proc<size; proc++) { 4878 if (!len_s[proc]) continue; 4879 /* form outgoing message for i-structure: 4880 buf_si[0]: nrows to be sent 4881 [1:nrows]: row index (global) 4882 [nrows+1:2*nrows+1]: i-structure index 4883 */ 4884 /*-------------------------------------------*/ 4885 nrows = len_si[proc]/2 - 1; 4886 buf_si_i = buf_si + nrows+1; 4887 buf_si[0] = nrows; 4888 buf_si_i[0] = 0; 4889 nrows = 0; 4890 for (i=owners[proc]; i<owners[proc+1]; i++) { 4891 anzi = ai[i+1] - ai[i]; 4892 if (anzi) { 4893 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4894 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4895 nrows++; 4896 } 4897 } 4898 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4899 k++; 4900 buf_si += len_si[proc]; 4901 } 4902 4903 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4904 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4905 4906 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4907 for (i=0; i<merge->nrecv; i++) { 4908 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4909 } 4910 4911 PetscCall(PetscFree(len_si)); 4912 PetscCall(PetscFree(len_ri)); 4913 PetscCall(PetscFree(rj_waits)); 4914 PetscCall(PetscFree2(si_waits,sj_waits)); 4915 PetscCall(PetscFree(ri_waits)); 4916 PetscCall(PetscFree(buf_s)); 4917 PetscCall(PetscFree(status)); 4918 4919 /* compute a local seq matrix in each processor */ 4920 /*----------------------------------------------*/ 4921 /* allocate bi array and free space for accumulating nonzero column info */ 4922 PetscCall(PetscMalloc1(m+1,&bi)); 4923 bi[0] = 0; 4924 4925 /* create and initialize a linked list */ 4926 nlnk = N+1; 4927 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4928 4929 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4930 len = ai[owners[rank+1]] - ai[owners[rank]]; 4931 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4932 4933 current_space = free_space; 4934 4935 /* determine symbolic info for each local row */ 4936 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4937 4938 for (k=0; k<merge->nrecv; k++) { 4939 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4940 nrows = *buf_ri_k[k]; 4941 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4942 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4943 } 4944 4945 MatPreallocateBegin(comm,m,n,dnz,onz); 4946 len = 0; 4947 for (i=0; i<m; i++) { 4948 bnzi = 0; 4949 /* add local non-zero cols of this proc's seqmat into lnk */ 4950 arow = owners[rank] + i; 4951 anzi = ai[arow+1] - ai[arow]; 4952 aj = a->j + ai[arow]; 4953 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4954 bnzi += nlnk; 4955 /* add received col data into lnk */ 4956 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4957 if (i == *nextrow[k]) { /* i-th row */ 4958 anzi = *(nextai[k]+1) - *nextai[k]; 4959 aj = buf_rj[k] + *nextai[k]; 4960 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4961 bnzi += nlnk; 4962 nextrow[k]++; nextai[k]++; 4963 } 4964 } 4965 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4966 4967 /* if free space is not available, make more free space */ 4968 if (current_space->local_remaining<bnzi) { 4969 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4970 } 4971 /* copy data into free space, then initialize lnk */ 4972 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4973 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4974 4975 current_space->array += bnzi; 4976 current_space->local_used += bnzi; 4977 current_space->local_remaining -= bnzi; 4978 4979 bi[i+1] = bi[i] + bnzi; 4980 } 4981 4982 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4983 4984 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4985 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4986 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4987 4988 /* create symbolic parallel matrix B_mpi */ 4989 /*---------------------------------------*/ 4990 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4991 PetscCall(MatCreate(comm,&B_mpi)); 4992 if (n==PETSC_DECIDE) { 4993 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4994 } else { 4995 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4996 } 4997 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4998 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4999 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 5000 MatPreallocateEnd(dnz,onz); 5001 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 5002 5003 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5004 B_mpi->assembled = PETSC_FALSE; 5005 merge->bi = bi; 5006 merge->bj = bj; 5007 merge->buf_ri = buf_ri; 5008 merge->buf_rj = buf_rj; 5009 merge->coi = NULL; 5010 merge->coj = NULL; 5011 merge->owners_co = NULL; 5012 5013 PetscCall(PetscCommDestroy(&comm)); 5014 5015 /* attach the supporting struct to B_mpi for reuse */ 5016 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5017 PetscCall(PetscContainerSetPointer(container,merge)); 5018 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5019 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5020 PetscCall(PetscContainerDestroy(&container)); 5021 *mpimat = B_mpi; 5022 5023 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5024 PetscFunctionReturn(0); 5025 } 5026 5027 /*@C 5028 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5029 matrices from each processor 5030 5031 Collective 5032 5033 Input Parameters: 5034 + comm - the communicators the parallel matrix will live on 5035 . seqmat - the input sequential matrices 5036 . m - number of local rows (or PETSC_DECIDE) 5037 . n - number of local columns (or PETSC_DECIDE) 5038 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5039 5040 Output Parameter: 5041 . mpimat - the parallel matrix generated 5042 5043 Level: advanced 5044 5045 Notes: 5046 The dimensions of the sequential matrix in each processor MUST be the same. 5047 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5048 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5049 @*/ 5050 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5051 { 5052 PetscMPIInt size; 5053 5054 PetscFunctionBegin; 5055 PetscCallMPI(MPI_Comm_size(comm,&size)); 5056 if (size == 1) { 5057 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5058 if (scall == MAT_INITIAL_MATRIX) { 5059 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5060 } else { 5061 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5062 } 5063 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5064 PetscFunctionReturn(0); 5065 } 5066 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5069 } 5070 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5071 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 /*@ 5076 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5077 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5078 with MatGetSize() 5079 5080 Not Collective 5081 5082 Input Parameters: 5083 + A - the matrix 5084 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5085 5086 Output Parameter: 5087 . A_loc - the local sequential matrix generated 5088 5089 Level: developer 5090 5091 Notes: 5092 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5093 5094 Destroy the matrix with MatDestroy() 5095 5096 .seealso: MatMPIAIJGetLocalMat() 5097 5098 @*/ 5099 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5100 { 5101 PetscBool mpi; 5102 5103 PetscFunctionBegin; 5104 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5105 if (mpi) { 5106 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5107 } else { 5108 *A_loc = A; 5109 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5110 } 5111 PetscFunctionReturn(0); 5112 } 5113 5114 /*@ 5115 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5116 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5117 with MatGetSize() 5118 5119 Not Collective 5120 5121 Input Parameters: 5122 + A - the matrix 5123 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 5125 Output Parameter: 5126 . A_loc - the local sequential matrix generated 5127 5128 Level: developer 5129 5130 Notes: 5131 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5132 5133 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5134 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5135 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5136 modify the values of the returned A_loc. 5137 5138 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5139 @*/ 5140 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5141 { 5142 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5143 Mat_SeqAIJ *mat,*a,*b; 5144 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5145 const PetscScalar *aa,*ba,*aav,*bav; 5146 PetscScalar *ca,*cam; 5147 PetscMPIInt size; 5148 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5149 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5150 PetscBool match; 5151 5152 PetscFunctionBegin; 5153 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5154 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5155 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5156 if (size == 1) { 5157 if (scall == MAT_INITIAL_MATRIX) { 5158 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5159 *A_loc = mpimat->A; 5160 } else if (scall == MAT_REUSE_MATRIX) { 5161 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5162 } 5163 PetscFunctionReturn(0); 5164 } 5165 5166 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5167 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5168 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5169 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5170 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5171 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5172 aa = aav; 5173 ba = bav; 5174 if (scall == MAT_INITIAL_MATRIX) { 5175 PetscCall(PetscMalloc1(1+am,&ci)); 5176 ci[0] = 0; 5177 for (i=0; i<am; i++) { 5178 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5179 } 5180 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5181 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5182 k = 0; 5183 for (i=0; i<am; i++) { 5184 ncols_o = bi[i+1] - bi[i]; 5185 ncols_d = ai[i+1] - ai[i]; 5186 /* off-diagonal portion of A */ 5187 for (jo=0; jo<ncols_o; jo++) { 5188 col = cmap[*bj]; 5189 if (col >= cstart) break; 5190 cj[k] = col; bj++; 5191 ca[k++] = *ba++; 5192 } 5193 /* diagonal portion of A */ 5194 for (j=0; j<ncols_d; j++) { 5195 cj[k] = cstart + *aj++; 5196 ca[k++] = *aa++; 5197 } 5198 /* off-diagonal portion of A */ 5199 for (j=jo; j<ncols_o; j++) { 5200 cj[k] = cmap[*bj++]; 5201 ca[k++] = *ba++; 5202 } 5203 } 5204 /* put together the new matrix */ 5205 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5206 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5207 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5208 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5209 mat->free_a = PETSC_TRUE; 5210 mat->free_ij = PETSC_TRUE; 5211 mat->nonew = 0; 5212 } else if (scall == MAT_REUSE_MATRIX) { 5213 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5214 ci = mat->i; 5215 cj = mat->j; 5216 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5217 for (i=0; i<am; i++) { 5218 /* off-diagonal portion of A */ 5219 ncols_o = bi[i+1] - bi[i]; 5220 for (jo=0; jo<ncols_o; jo++) { 5221 col = cmap[*bj]; 5222 if (col >= cstart) break; 5223 *cam++ = *ba++; bj++; 5224 } 5225 /* diagonal portion of A */ 5226 ncols_d = ai[i+1] - ai[i]; 5227 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5228 /* off-diagonal portion of A */ 5229 for (j=jo; j<ncols_o; j++) { 5230 *cam++ = *ba++; bj++; 5231 } 5232 } 5233 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5234 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5235 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5236 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5237 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5238 PetscFunctionReturn(0); 5239 } 5240 5241 /*@ 5242 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5243 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5244 5245 Not Collective 5246 5247 Input Parameters: 5248 + A - the matrix 5249 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5250 5251 Output Parameters: 5252 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5253 - A_loc - the local sequential matrix generated 5254 5255 Level: developer 5256 5257 Notes: 5258 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5259 5260 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5261 5262 @*/ 5263 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5264 { 5265 Mat Ao,Ad; 5266 const PetscInt *cmap; 5267 PetscMPIInt size; 5268 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5269 5270 PetscFunctionBegin; 5271 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5272 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5273 if (size == 1) { 5274 if (scall == MAT_INITIAL_MATRIX) { 5275 PetscCall(PetscObjectReference((PetscObject)Ad)); 5276 *A_loc = Ad; 5277 } else if (scall == MAT_REUSE_MATRIX) { 5278 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5279 } 5280 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5281 PetscFunctionReturn(0); 5282 } 5283 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5284 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5285 if (f) { 5286 PetscCall((*f)(A,scall,glob,A_loc)); 5287 } else { 5288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5289 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5290 Mat_SeqAIJ *c; 5291 PetscInt *ai = a->i, *aj = a->j; 5292 PetscInt *bi = b->i, *bj = b->j; 5293 PetscInt *ci,*cj; 5294 const PetscScalar *aa,*ba; 5295 PetscScalar *ca; 5296 PetscInt i,j,am,dn,on; 5297 5298 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5299 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5300 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5301 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5302 if (scall == MAT_INITIAL_MATRIX) { 5303 PetscInt k; 5304 PetscCall(PetscMalloc1(1+am,&ci)); 5305 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5306 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5307 ci[0] = 0; 5308 for (i=0,k=0; i<am; i++) { 5309 const PetscInt ncols_o = bi[i+1] - bi[i]; 5310 const PetscInt ncols_d = ai[i+1] - ai[i]; 5311 ci[i+1] = ci[i] + ncols_o + ncols_d; 5312 /* diagonal portion of A */ 5313 for (j=0; j<ncols_d; j++,k++) { 5314 cj[k] = *aj++; 5315 ca[k] = *aa++; 5316 } 5317 /* off-diagonal portion of A */ 5318 for (j=0; j<ncols_o; j++,k++) { 5319 cj[k] = dn + *bj++; 5320 ca[k] = *ba++; 5321 } 5322 } 5323 /* put together the new matrix */ 5324 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5325 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5326 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5327 c = (Mat_SeqAIJ*)(*A_loc)->data; 5328 c->free_a = PETSC_TRUE; 5329 c->free_ij = PETSC_TRUE; 5330 c->nonew = 0; 5331 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5332 } else if (scall == MAT_REUSE_MATRIX) { 5333 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5334 for (i=0; i<am; i++) { 5335 const PetscInt ncols_d = ai[i+1] - ai[i]; 5336 const PetscInt ncols_o = bi[i+1] - bi[i]; 5337 /* diagonal portion of A */ 5338 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5339 /* off-diagonal portion of A */ 5340 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5341 } 5342 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5343 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5344 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5345 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5346 if (glob) { 5347 PetscInt cst, *gidx; 5348 5349 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5350 PetscCall(PetscMalloc1(dn+on,&gidx)); 5351 for (i=0; i<dn; i++) gidx[i] = cst + i; 5352 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5354 } 5355 } 5356 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5357 PetscFunctionReturn(0); 5358 } 5359 5360 /*@C 5361 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5362 5363 Not Collective 5364 5365 Input Parameters: 5366 + A - the matrix 5367 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5368 - row, col - index sets of rows and columns to extract (or NULL) 5369 5370 Output Parameter: 5371 . A_loc - the local sequential matrix generated 5372 5373 Level: developer 5374 5375 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5376 5377 @*/ 5378 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5379 { 5380 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5381 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5382 IS isrowa,iscola; 5383 Mat *aloc; 5384 PetscBool match; 5385 5386 PetscFunctionBegin; 5387 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5388 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5389 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5390 if (!row) { 5391 start = A->rmap->rstart; end = A->rmap->rend; 5392 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5393 } else { 5394 isrowa = *row; 5395 } 5396 if (!col) { 5397 start = A->cmap->rstart; 5398 cmap = a->garray; 5399 nzA = a->A->cmap->n; 5400 nzB = a->B->cmap->n; 5401 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5402 ncols = 0; 5403 for (i=0; i<nzB; i++) { 5404 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5405 else break; 5406 } 5407 imark = i; 5408 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5409 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5410 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5411 } else { 5412 iscola = *col; 5413 } 5414 if (scall != MAT_INITIAL_MATRIX) { 5415 PetscCall(PetscMalloc1(1,&aloc)); 5416 aloc[0] = *A_loc; 5417 } 5418 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5419 if (!col) { /* attach global id of condensed columns */ 5420 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5421 } 5422 *A_loc = aloc[0]; 5423 PetscCall(PetscFree(aloc)); 5424 if (!row) { 5425 PetscCall(ISDestroy(&isrowa)); 5426 } 5427 if (!col) { 5428 PetscCall(ISDestroy(&iscola)); 5429 } 5430 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5431 PetscFunctionReturn(0); 5432 } 5433 5434 /* 5435 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5436 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5437 * on a global size. 5438 * */ 5439 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5440 { 5441 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5442 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5443 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5444 PetscMPIInt owner; 5445 PetscSFNode *iremote,*oiremote; 5446 const PetscInt *lrowindices; 5447 PetscSF sf,osf; 5448 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5449 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5450 MPI_Comm comm; 5451 ISLocalToGlobalMapping mapping; 5452 const PetscScalar *pd_a,*po_a; 5453 5454 PetscFunctionBegin; 5455 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5456 /* plocalsize is the number of roots 5457 * nrows is the number of leaves 5458 * */ 5459 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5460 PetscCall(ISGetLocalSize(rows,&nrows)); 5461 PetscCall(PetscCalloc1(nrows,&iremote)); 5462 PetscCall(ISGetIndices(rows,&lrowindices)); 5463 for (i=0;i<nrows;i++) { 5464 /* Find a remote index and an owner for a row 5465 * The row could be local or remote 5466 * */ 5467 owner = 0; 5468 lidx = 0; 5469 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5470 iremote[i].index = lidx; 5471 iremote[i].rank = owner; 5472 } 5473 /* Create SF to communicate how many nonzero columns for each row */ 5474 PetscCall(PetscSFCreate(comm,&sf)); 5475 /* SF will figure out the number of nonzero colunms for each row, and their 5476 * offsets 5477 * */ 5478 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5479 PetscCall(PetscSFSetFromOptions(sf)); 5480 PetscCall(PetscSFSetUp(sf)); 5481 5482 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5483 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5484 PetscCall(PetscCalloc1(nrows,&pnnz)); 5485 roffsets[0] = 0; 5486 roffsets[1] = 0; 5487 for (i=0;i<plocalsize;i++) { 5488 /* diag */ 5489 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5490 /* off diag */ 5491 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5492 /* compute offsets so that we relative location for each row */ 5493 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5494 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5495 } 5496 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5497 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5498 /* 'r' means root, and 'l' means leaf */ 5499 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5500 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5501 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5502 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5503 PetscCall(PetscSFDestroy(&sf)); 5504 PetscCall(PetscFree(roffsets)); 5505 PetscCall(PetscFree(nrcols)); 5506 dntotalcols = 0; 5507 ontotalcols = 0; 5508 ncol = 0; 5509 for (i=0;i<nrows;i++) { 5510 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5511 ncol = PetscMax(pnnz[i],ncol); 5512 /* diag */ 5513 dntotalcols += nlcols[i*2+0]; 5514 /* off diag */ 5515 ontotalcols += nlcols[i*2+1]; 5516 } 5517 /* We do not need to figure the right number of columns 5518 * since all the calculations will be done by going through the raw data 5519 * */ 5520 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5521 PetscCall(MatSetUp(*P_oth)); 5522 PetscCall(PetscFree(pnnz)); 5523 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5524 /* diag */ 5525 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5526 /* off diag */ 5527 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5528 /* diag */ 5529 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5530 /* off diag */ 5531 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5532 dntotalcols = 0; 5533 ontotalcols = 0; 5534 ntotalcols = 0; 5535 for (i=0;i<nrows;i++) { 5536 owner = 0; 5537 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5538 /* Set iremote for diag matrix */ 5539 for (j=0;j<nlcols[i*2+0];j++) { 5540 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5541 iremote[dntotalcols].rank = owner; 5542 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5543 ilocal[dntotalcols++] = ntotalcols++; 5544 } 5545 /* off diag */ 5546 for (j=0;j<nlcols[i*2+1];j++) { 5547 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5548 oiremote[ontotalcols].rank = owner; 5549 oilocal[ontotalcols++] = ntotalcols++; 5550 } 5551 } 5552 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5553 PetscCall(PetscFree(loffsets)); 5554 PetscCall(PetscFree(nlcols)); 5555 PetscCall(PetscSFCreate(comm,&sf)); 5556 /* P serves as roots and P_oth is leaves 5557 * Diag matrix 5558 * */ 5559 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5560 PetscCall(PetscSFSetFromOptions(sf)); 5561 PetscCall(PetscSFSetUp(sf)); 5562 5563 PetscCall(PetscSFCreate(comm,&osf)); 5564 /* Off diag */ 5565 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5566 PetscCall(PetscSFSetFromOptions(osf)); 5567 PetscCall(PetscSFSetUp(osf)); 5568 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5569 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5570 /* We operate on the matrix internal data for saving memory */ 5571 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5572 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5573 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5574 /* Convert to global indices for diag matrix */ 5575 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5576 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5577 /* We want P_oth store global indices */ 5578 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5579 /* Use memory scalable approach */ 5580 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5581 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5582 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5583 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5584 /* Convert back to local indices */ 5585 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5586 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5587 nout = 0; 5588 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5589 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5590 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5591 /* Exchange values */ 5592 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5593 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5594 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5595 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5596 /* Stop PETSc from shrinking memory */ 5597 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5598 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5599 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5600 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5601 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5602 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5603 PetscCall(PetscSFDestroy(&sf)); 5604 PetscCall(PetscSFDestroy(&osf)); 5605 PetscFunctionReturn(0); 5606 } 5607 5608 /* 5609 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5610 * This supports MPIAIJ and MAIJ 5611 * */ 5612 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5613 { 5614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5615 Mat_SeqAIJ *p_oth; 5616 IS rows,map; 5617 PetscHMapI hamp; 5618 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5619 MPI_Comm comm; 5620 PetscSF sf,osf; 5621 PetscBool has; 5622 5623 PetscFunctionBegin; 5624 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5625 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5626 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5627 * and then create a submatrix (that often is an overlapping matrix) 5628 * */ 5629 if (reuse == MAT_INITIAL_MATRIX) { 5630 /* Use a hash table to figure out unique keys */ 5631 PetscCall(PetscHMapICreate(&hamp)); 5632 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5633 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5634 count = 0; 5635 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5636 for (i=0;i<a->B->cmap->n;i++) { 5637 key = a->garray[i]/dof; 5638 PetscCall(PetscHMapIHas(hamp,key,&has)); 5639 if (!has) { 5640 mapping[i] = count; 5641 PetscCall(PetscHMapISet(hamp,key,count++)); 5642 } else { 5643 /* Current 'i' has the same value the previous step */ 5644 mapping[i] = count-1; 5645 } 5646 } 5647 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5648 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5649 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5650 PetscCall(PetscCalloc1(htsize,&rowindices)); 5651 off = 0; 5652 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5653 PetscCall(PetscHMapIDestroy(&hamp)); 5654 PetscCall(PetscSortInt(htsize,rowindices)); 5655 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5656 /* In case, the matrix was already created but users want to recreate the matrix */ 5657 PetscCall(MatDestroy(P_oth)); 5658 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5659 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5660 PetscCall(ISDestroy(&map)); 5661 PetscCall(ISDestroy(&rows)); 5662 } else if (reuse == MAT_REUSE_MATRIX) { 5663 /* If matrix was already created, we simply update values using SF objects 5664 * that as attached to the matrix ealier. 5665 */ 5666 const PetscScalar *pd_a,*po_a; 5667 5668 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5669 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5670 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5671 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5672 /* Update values in place */ 5673 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5674 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5675 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5676 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5677 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5678 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5679 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5680 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5681 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5682 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5683 PetscFunctionReturn(0); 5684 } 5685 5686 /*@C 5687 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5688 5689 Collective on Mat 5690 5691 Input Parameters: 5692 + A - the first matrix in mpiaij format 5693 . B - the second matrix in mpiaij format 5694 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5695 5696 Output Parameters: 5697 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5698 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5699 - B_seq - the sequential matrix generated 5700 5701 Level: developer 5702 5703 @*/ 5704 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5705 { 5706 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5707 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5708 IS isrowb,iscolb; 5709 Mat *bseq=NULL; 5710 5711 PetscFunctionBegin; 5712 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5713 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5714 } 5715 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5716 5717 if (scall == MAT_INITIAL_MATRIX) { 5718 start = A->cmap->rstart; 5719 cmap = a->garray; 5720 nzA = a->A->cmap->n; 5721 nzB = a->B->cmap->n; 5722 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5723 ncols = 0; 5724 for (i=0; i<nzB; i++) { /* row < local row index */ 5725 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5726 else break; 5727 } 5728 imark = i; 5729 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5730 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5731 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5732 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5733 } else { 5734 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5735 isrowb = *rowb; iscolb = *colb; 5736 PetscCall(PetscMalloc1(1,&bseq)); 5737 bseq[0] = *B_seq; 5738 } 5739 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5740 *B_seq = bseq[0]; 5741 PetscCall(PetscFree(bseq)); 5742 if (!rowb) { 5743 PetscCall(ISDestroy(&isrowb)); 5744 } else { 5745 *rowb = isrowb; 5746 } 5747 if (!colb) { 5748 PetscCall(ISDestroy(&iscolb)); 5749 } else { 5750 *colb = iscolb; 5751 } 5752 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5753 PetscFunctionReturn(0); 5754 } 5755 5756 /* 5757 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5758 of the OFF-DIAGONAL portion of local A 5759 5760 Collective on Mat 5761 5762 Input Parameters: 5763 + A,B - the matrices in mpiaij format 5764 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5765 5766 Output Parameter: 5767 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5768 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5769 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5770 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5771 5772 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5773 for this matrix. This is not desirable.. 5774 5775 Level: developer 5776 5777 */ 5778 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5779 { 5780 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5781 Mat_SeqAIJ *b_oth; 5782 VecScatter ctx; 5783 MPI_Comm comm; 5784 const PetscMPIInt *rprocs,*sprocs; 5785 const PetscInt *srow,*rstarts,*sstarts; 5786 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5787 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5788 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5789 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5790 PetscMPIInt size,tag,rank,nreqs; 5791 5792 PetscFunctionBegin; 5793 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5794 PetscCallMPI(MPI_Comm_size(comm,&size)); 5795 5796 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5797 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5798 } 5799 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5800 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5801 5802 if (size == 1) { 5803 startsj_s = NULL; 5804 bufa_ptr = NULL; 5805 *B_oth = NULL; 5806 PetscFunctionReturn(0); 5807 } 5808 5809 ctx = a->Mvctx; 5810 tag = ((PetscObject)ctx)->tag; 5811 5812 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5813 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5814 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5815 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5816 PetscCall(PetscMalloc1(nreqs,&reqs)); 5817 rwaits = reqs; 5818 swaits = reqs + nrecvs; 5819 5820 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5821 if (scall == MAT_INITIAL_MATRIX) { 5822 /* i-array */ 5823 /*---------*/ 5824 /* post receives */ 5825 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5826 for (i=0; i<nrecvs; i++) { 5827 rowlen = rvalues + rstarts[i]*rbs; 5828 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5829 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5830 } 5831 5832 /* pack the outgoing message */ 5833 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5834 5835 sstartsj[0] = 0; 5836 rstartsj[0] = 0; 5837 len = 0; /* total length of j or a array to be sent */ 5838 if (nsends) { 5839 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5840 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5841 } 5842 for (i=0; i<nsends; i++) { 5843 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5844 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5845 for (j=0; j<nrows; j++) { 5846 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5847 for (l=0; l<sbs; l++) { 5848 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5849 5850 rowlen[j*sbs+l] = ncols; 5851 5852 len += ncols; 5853 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5854 } 5855 k++; 5856 } 5857 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5858 5859 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5860 } 5861 /* recvs and sends of i-array are completed */ 5862 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5863 PetscCall(PetscFree(svalues)); 5864 5865 /* allocate buffers for sending j and a arrays */ 5866 PetscCall(PetscMalloc1(len+1,&bufj)); 5867 PetscCall(PetscMalloc1(len+1,&bufa)); 5868 5869 /* create i-array of B_oth */ 5870 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5871 5872 b_othi[0] = 0; 5873 len = 0; /* total length of j or a array to be received */ 5874 k = 0; 5875 for (i=0; i<nrecvs; i++) { 5876 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5877 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5878 for (j=0; j<nrows; j++) { 5879 b_othi[k+1] = b_othi[k] + rowlen[j]; 5880 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5881 k++; 5882 } 5883 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5884 } 5885 PetscCall(PetscFree(rvalues)); 5886 5887 /* allocate space for j and a arrays of B_oth */ 5888 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5889 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5890 5891 /* j-array */ 5892 /*---------*/ 5893 /* post receives of j-array */ 5894 for (i=0; i<nrecvs; i++) { 5895 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5896 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5897 } 5898 5899 /* pack the outgoing message j-array */ 5900 if (nsends) k = sstarts[0]; 5901 for (i=0; i<nsends; i++) { 5902 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5903 bufJ = bufj+sstartsj[i]; 5904 for (j=0; j<nrows; j++) { 5905 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5906 for (ll=0; ll<sbs; ll++) { 5907 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5908 for (l=0; l<ncols; l++) { 5909 *bufJ++ = cols[l]; 5910 } 5911 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5912 } 5913 } 5914 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5915 } 5916 5917 /* recvs and sends of j-array are completed */ 5918 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5919 } else if (scall == MAT_REUSE_MATRIX) { 5920 sstartsj = *startsj_s; 5921 rstartsj = *startsj_r; 5922 bufa = *bufa_ptr; 5923 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5924 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5925 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5926 5927 /* a-array */ 5928 /*---------*/ 5929 /* post receives of a-array */ 5930 for (i=0; i<nrecvs; i++) { 5931 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5932 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5933 } 5934 5935 /* pack the outgoing message a-array */ 5936 if (nsends) k = sstarts[0]; 5937 for (i=0; i<nsends; i++) { 5938 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5939 bufA = bufa+sstartsj[i]; 5940 for (j=0; j<nrows; j++) { 5941 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5942 for (ll=0; ll<sbs; ll++) { 5943 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5944 for (l=0; l<ncols; l++) { 5945 *bufA++ = vals[l]; 5946 } 5947 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5948 } 5949 } 5950 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5951 } 5952 /* recvs and sends of a-array are completed */ 5953 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5954 PetscCall(PetscFree(reqs)); 5955 5956 if (scall == MAT_INITIAL_MATRIX) { 5957 /* put together the new matrix */ 5958 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5959 5960 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5961 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5962 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5963 b_oth->free_a = PETSC_TRUE; 5964 b_oth->free_ij = PETSC_TRUE; 5965 b_oth->nonew = 0; 5966 5967 PetscCall(PetscFree(bufj)); 5968 if (!startsj_s || !bufa_ptr) { 5969 PetscCall(PetscFree2(sstartsj,rstartsj)); 5970 PetscCall(PetscFree(bufa_ptr)); 5971 } else { 5972 *startsj_s = sstartsj; 5973 *startsj_r = rstartsj; 5974 *bufa_ptr = bufa; 5975 } 5976 } else if (scall == MAT_REUSE_MATRIX) { 5977 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5978 } 5979 5980 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5981 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5982 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5983 PetscFunctionReturn(0); 5984 } 5985 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5989 #if defined(PETSC_HAVE_MKL_SPARSE) 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5991 #endif 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5994 #if defined(PETSC_HAVE_ELEMENTAL) 5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5996 #endif 5997 #if defined(PETSC_HAVE_SCALAPACK) 5998 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5999 #endif 6000 #if defined(PETSC_HAVE_HYPRE) 6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 6002 #endif 6003 #if defined(PETSC_HAVE_CUDA) 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6005 #endif 6006 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6008 #endif 6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6010 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6011 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6012 6013 /* 6014 Computes (B'*A')' since computing B*A directly is untenable 6015 6016 n p p 6017 [ ] [ ] [ ] 6018 m [ A ] * n [ B ] = m [ C ] 6019 [ ] [ ] [ ] 6020 6021 */ 6022 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6023 { 6024 Mat At,Bt,Ct; 6025 6026 PetscFunctionBegin; 6027 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6028 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6029 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6030 PetscCall(MatDestroy(&At)); 6031 PetscCall(MatDestroy(&Bt)); 6032 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6033 PetscCall(MatDestroy(&Ct)); 6034 PetscFunctionReturn(0); 6035 } 6036 6037 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6038 { 6039 PetscBool cisdense; 6040 6041 PetscFunctionBegin; 6042 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6043 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6044 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6045 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6046 if (!cisdense) { 6047 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6048 } 6049 PetscCall(MatSetUp(C)); 6050 6051 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6052 PetscFunctionReturn(0); 6053 } 6054 6055 /* ----------------------------------------------------------------*/ 6056 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6057 { 6058 Mat_Product *product = C->product; 6059 Mat A = product->A,B=product->B; 6060 6061 PetscFunctionBegin; 6062 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6063 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6064 6065 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6066 C->ops->productsymbolic = MatProductSymbolic_AB; 6067 PetscFunctionReturn(0); 6068 } 6069 6070 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6071 { 6072 Mat_Product *product = C->product; 6073 6074 PetscFunctionBegin; 6075 if (product->type == MATPRODUCT_AB) { 6076 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6077 } 6078 PetscFunctionReturn(0); 6079 } 6080 6081 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6082 6083 Input Parameters: 6084 6085 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6086 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6087 6088 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6089 6090 For Set1, j1[] contains column indices of the nonzeros. 6091 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6092 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6093 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6094 6095 Similar for Set2. 6096 6097 This routine merges the two sets of nonzeros row by row and removes repeats. 6098 6099 Output Parameters: (memory is allocated by the caller) 6100 6101 i[],j[]: the CSR of the merged matrix, which has m rows. 6102 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6103 imap2[]: similar to imap1[], but for Set2. 6104 Note we order nonzeros row-by-row and from left to right. 6105 */ 6106 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6107 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6108 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6109 { 6110 PetscInt r,m; /* Row index of mat */ 6111 PetscCount t,t1,t2,b1,e1,b2,e2; 6112 6113 PetscFunctionBegin; 6114 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6115 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6116 i[0] = 0; 6117 for (r=0; r<m; r++) { /* Do row by row merging */ 6118 b1 = rowBegin1[r]; 6119 e1 = rowEnd1[r]; 6120 b2 = rowBegin2[r]; 6121 e2 = rowEnd2[r]; 6122 while (b1 < e1 && b2 < e2) { 6123 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6124 j[t] = j1[b1]; 6125 imap1[t1] = t; 6126 imap2[t2] = t; 6127 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6128 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6129 t1++; t2++; t++; 6130 } else if (j1[b1] < j2[b2]) { 6131 j[t] = j1[b1]; 6132 imap1[t1] = t; 6133 b1 += jmap1[t1+1] - jmap1[t1]; 6134 t1++; t++; 6135 } else { 6136 j[t] = j2[b2]; 6137 imap2[t2] = t; 6138 b2 += jmap2[t2+1] - jmap2[t2]; 6139 t2++; t++; 6140 } 6141 } 6142 /* Merge the remaining in either j1[] or j2[] */ 6143 while (b1 < e1) { 6144 j[t] = j1[b1]; 6145 imap1[t1] = t; 6146 b1 += jmap1[t1+1] - jmap1[t1]; 6147 t1++; t++; 6148 } 6149 while (b2 < e2) { 6150 j[t] = j2[b2]; 6151 imap2[t2] = t; 6152 b2 += jmap2[t2+1] - jmap2[t2]; 6153 t2++; t++; 6154 } 6155 i[r+1] = t; 6156 } 6157 PetscFunctionReturn(0); 6158 } 6159 6160 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6161 6162 Input Parameters: 6163 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6164 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6165 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6166 6167 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6168 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6169 6170 Output Parameters: 6171 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6172 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6173 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6174 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6175 6176 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6177 Atot: number of entries belonging to the diagonal block. 6178 Annz: number of unique nonzeros belonging to the diagonal block. 6179 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6180 repeats (i.e., same 'i,j' pair). 6181 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6182 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6183 6184 Atot: number of entries belonging to the diagonal block 6185 Annz: number of unique nonzeros belonging to the diagonal block. 6186 6187 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6188 6189 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6190 */ 6191 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6192 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6193 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6194 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6195 { 6196 PetscInt cstart,cend,rstart,rend,row,col; 6197 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6198 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6199 PetscCount k,m,p,q,r,s,mid; 6200 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6201 6202 PetscFunctionBegin; 6203 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6204 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6205 m = rend - rstart; 6206 6207 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6208 6209 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6210 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6211 */ 6212 while (k<n) { 6213 row = i[k]; 6214 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6215 for (s=k; s<n; s++) if (i[s] != row) break; 6216 for (p=k; p<s; p++) { 6217 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6218 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6219 } 6220 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6221 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6222 rowBegin[row-rstart] = k; 6223 rowMid[row-rstart] = mid; 6224 rowEnd[row-rstart] = s; 6225 6226 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6227 Atot += mid - k; 6228 Btot += s - mid; 6229 6230 /* Count unique nonzeros of this diag/offdiag row */ 6231 for (p=k; p<mid;) { 6232 col = j[p]; 6233 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6234 Annz++; 6235 } 6236 6237 for (p=mid; p<s;) { 6238 col = j[p]; 6239 do {p++;} while (p<s && j[p] == col); 6240 Bnnz++; 6241 } 6242 k = s; 6243 } 6244 6245 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6246 PetscCall(PetscMalloc1(Atot,&Aperm)); 6247 PetscCall(PetscMalloc1(Btot,&Bperm)); 6248 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6249 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6250 6251 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6252 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6253 for (r=0; r<m; r++) { 6254 k = rowBegin[r]; 6255 mid = rowMid[r]; 6256 s = rowEnd[r]; 6257 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6258 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6259 Atot += mid - k; 6260 Btot += s - mid; 6261 6262 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6263 for (p=k; p<mid;) { 6264 col = j[p]; 6265 q = p; 6266 do {p++;} while (p<mid && j[p] == col); 6267 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6268 Annz++; 6269 } 6270 6271 for (p=mid; p<s;) { 6272 col = j[p]; 6273 q = p; 6274 do {p++;} while (p<s && j[p] == col); 6275 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6276 Bnnz++; 6277 } 6278 } 6279 /* Output */ 6280 *Aperm_ = Aperm; 6281 *Annz_ = Annz; 6282 *Atot_ = Atot; 6283 *Ajmap_ = Ajmap; 6284 *Bperm_ = Bperm; 6285 *Bnnz_ = Bnnz; 6286 *Btot_ = Btot; 6287 *Bjmap_ = Bjmap; 6288 PetscFunctionReturn(0); 6289 } 6290 6291 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6292 6293 Input Parameters: 6294 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6295 nnz: number of unique nonzeros in the merged matrix 6296 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6297 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6298 6299 Output Parameter: (memory is allocated by the caller) 6300 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6301 6302 Example: 6303 nnz1 = 4 6304 nnz = 6 6305 imap = [1,3,4,5] 6306 jmap = [0,3,5,6,7] 6307 then, 6308 jmap_new = [0,0,3,3,5,6,7] 6309 */ 6310 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6311 { 6312 PetscCount k,p; 6313 6314 PetscFunctionBegin; 6315 jmap_new[0] = 0; 6316 p = nnz; /* p loops over jmap_new[] backwards */ 6317 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6318 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6319 } 6320 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6321 PetscFunctionReturn(0); 6322 } 6323 6324 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6325 { 6326 MPI_Comm comm; 6327 PetscMPIInt rank,size; 6328 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6329 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6330 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6331 6332 PetscFunctionBegin; 6333 PetscCall(PetscFree(mpiaij->garray)); 6334 PetscCall(VecDestroy(&mpiaij->lvec)); 6335 #if defined(PETSC_USE_CTABLE) 6336 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6337 #else 6338 PetscCall(PetscFree(mpiaij->colmap)); 6339 #endif 6340 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6341 mat->assembled = PETSC_FALSE; 6342 mat->was_assembled = PETSC_FALSE; 6343 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6344 6345 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6346 PetscCallMPI(MPI_Comm_size(comm,&size)); 6347 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6348 PetscCall(PetscLayoutSetUp(mat->rmap)); 6349 PetscCall(PetscLayoutSetUp(mat->cmap)); 6350 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6351 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6352 PetscCall(MatGetLocalSize(mat,&m,&n)); 6353 PetscCall(MatGetSize(mat,&M,&N)); 6354 6355 /* ---------------------------------------------------------------------------*/ 6356 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6357 /* entries come first, then local rows, then remote rows. */ 6358 /* ---------------------------------------------------------------------------*/ 6359 PetscCount n1 = coo_n,*perm1; 6360 PetscInt *i1 = coo_i,*j1 = coo_j; 6361 6362 PetscCall(PetscMalloc1(n1,&perm1)); 6363 for (k=0; k<n1; k++) perm1[k] = k; 6364 6365 /* Manipulate indices so that entries with negative row or col indices will have smallest 6366 row indices, local entries will have greater but negative row indices, and remote entries 6367 will have positive row indices. 6368 */ 6369 for (k=0; k<n1; k++) { 6370 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6371 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6372 else { 6373 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6374 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6375 } 6376 } 6377 6378 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6379 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6380 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6381 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6382 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6383 6384 /* ---------------------------------------------------------------------------*/ 6385 /* Split local rows into diag/offdiag portions */ 6386 /* ---------------------------------------------------------------------------*/ 6387 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6388 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6389 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6390 6391 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6392 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6393 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6394 6395 /* ---------------------------------------------------------------------------*/ 6396 /* Send remote rows to their owner */ 6397 /* ---------------------------------------------------------------------------*/ 6398 /* Find which rows should be sent to which remote ranks*/ 6399 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6400 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6401 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6402 const PetscInt *ranges; 6403 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6404 6405 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6406 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6407 for (k=rem; k<n1;) { 6408 PetscMPIInt owner; 6409 PetscInt firstRow,lastRow; 6410 6411 /* Locate a row range */ 6412 firstRow = i1[k]; /* first row of this owner */ 6413 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6414 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6415 6416 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6417 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6418 6419 /* All entries in [k,p) belong to this remote owner */ 6420 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6421 PetscMPIInt *sendto2; 6422 PetscInt *nentries2; 6423 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6424 6425 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6426 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6427 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6428 PetscCall(PetscFree2(sendto,nentries2)); 6429 sendto = sendto2; 6430 nentries = nentries2; 6431 maxNsend = maxNsend2; 6432 } 6433 sendto[nsend] = owner; 6434 nentries[nsend] = p - k; 6435 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6436 nsend++; 6437 k = p; 6438 } 6439 6440 /* Build 1st SF to know offsets on remote to send data */ 6441 PetscSF sf1; 6442 PetscInt nroots = 1,nroots2 = 0; 6443 PetscInt nleaves = nsend,nleaves2 = 0; 6444 PetscInt *offsets; 6445 PetscSFNode *iremote; 6446 6447 PetscCall(PetscSFCreate(comm,&sf1)); 6448 PetscCall(PetscMalloc1(nsend,&iremote)); 6449 PetscCall(PetscMalloc1(nsend,&offsets)); 6450 for (k=0; k<nsend; k++) { 6451 iremote[k].rank = sendto[k]; 6452 iremote[k].index = 0; 6453 nleaves2 += nentries[k]; 6454 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6455 } 6456 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6457 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6458 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6459 PetscCall(PetscSFDestroy(&sf1)); 6460 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6461 6462 /* Build 2nd SF to send remote COOs to their owner */ 6463 PetscSF sf2; 6464 nroots = nroots2; 6465 nleaves = nleaves2; 6466 PetscCall(PetscSFCreate(comm,&sf2)); 6467 PetscCall(PetscSFSetFromOptions(sf2)); 6468 PetscCall(PetscMalloc1(nleaves,&iremote)); 6469 p = 0; 6470 for (k=0; k<nsend; k++) { 6471 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6472 for (q=0; q<nentries[k]; q++,p++) { 6473 iremote[p].rank = sendto[k]; 6474 iremote[p].index = offsets[k] + q; 6475 } 6476 } 6477 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6478 6479 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6480 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6481 6482 /* Send the remote COOs to their owner */ 6483 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6484 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6485 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6486 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6487 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6488 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6489 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6490 6491 PetscCall(PetscFree(offsets)); 6492 PetscCall(PetscFree2(sendto,nentries)); 6493 6494 /* ---------------------------------------------------------------*/ 6495 /* Sort received COOs by row along with the permutation array */ 6496 /* ---------------------------------------------------------------*/ 6497 for (k=0; k<n2; k++) perm2[k] = k; 6498 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6499 6500 /* ---------------------------------------------------------------*/ 6501 /* Split received COOs into diag/offdiag portions */ 6502 /* ---------------------------------------------------------------*/ 6503 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6504 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6505 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6506 6507 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6508 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6509 6510 /* --------------------------------------------------------------------------*/ 6511 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6512 /* --------------------------------------------------------------------------*/ 6513 PetscInt *Ai,*Bi; 6514 PetscInt *Aj,*Bj; 6515 6516 PetscCall(PetscMalloc1(m+1,&Ai)); 6517 PetscCall(PetscMalloc1(m+1,&Bi)); 6518 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6519 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6520 6521 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6522 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6523 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6524 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6525 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6526 6527 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6528 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6529 6530 /* --------------------------------------------------------------------------*/ 6531 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6532 /* expect nonzeros in A/B most likely have local contributing entries */ 6533 /* --------------------------------------------------------------------------*/ 6534 PetscInt Annz = Ai[m]; 6535 PetscInt Bnnz = Bi[m]; 6536 PetscCount *Ajmap1_new,*Bjmap1_new; 6537 6538 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6539 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6540 6541 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6542 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6543 6544 PetscCall(PetscFree(Aimap1)); 6545 PetscCall(PetscFree(Ajmap1)); 6546 PetscCall(PetscFree(Bimap1)); 6547 PetscCall(PetscFree(Bjmap1)); 6548 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6549 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6550 PetscCall(PetscFree(perm1)); 6551 PetscCall(PetscFree3(i2,j2,perm2)); 6552 6553 Ajmap1 = Ajmap1_new; 6554 Bjmap1 = Bjmap1_new; 6555 6556 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6557 if (Annz < Annz1 + Annz2) { 6558 PetscInt *Aj_new; 6559 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6560 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6561 PetscCall(PetscFree(Aj)); 6562 Aj = Aj_new; 6563 } 6564 6565 if (Bnnz < Bnnz1 + Bnnz2) { 6566 PetscInt *Bj_new; 6567 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6568 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6569 PetscCall(PetscFree(Bj)); 6570 Bj = Bj_new; 6571 } 6572 6573 /* --------------------------------------------------------------------------------*/ 6574 /* Create new submatrices for on-process and off-process coupling */ 6575 /* --------------------------------------------------------------------------------*/ 6576 PetscScalar *Aa,*Ba; 6577 MatType rtype; 6578 Mat_SeqAIJ *a,*b; 6579 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6580 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6581 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6582 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6583 PetscCall(MatDestroy(&mpiaij->A)); 6584 PetscCall(MatDestroy(&mpiaij->B)); 6585 PetscCall(MatGetRootType_Private(mat,&rtype)); 6586 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6587 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6588 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6589 6590 a = (Mat_SeqAIJ*)mpiaij->A->data; 6591 b = (Mat_SeqAIJ*)mpiaij->B->data; 6592 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6593 a->free_a = b->free_a = PETSC_TRUE; 6594 a->free_ij = b->free_ij = PETSC_TRUE; 6595 6596 /* conversion must happen AFTER multiply setup */ 6597 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6598 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6599 PetscCall(VecDestroy(&mpiaij->lvec)); 6600 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6601 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6602 6603 mpiaij->coo_n = coo_n; 6604 mpiaij->coo_sf = sf2; 6605 mpiaij->sendlen = nleaves; 6606 mpiaij->recvlen = nroots; 6607 6608 mpiaij->Annz = Annz; 6609 mpiaij->Bnnz = Bnnz; 6610 6611 mpiaij->Annz2 = Annz2; 6612 mpiaij->Bnnz2 = Bnnz2; 6613 6614 mpiaij->Atot1 = Atot1; 6615 mpiaij->Atot2 = Atot2; 6616 mpiaij->Btot1 = Btot1; 6617 mpiaij->Btot2 = Btot2; 6618 6619 mpiaij->Ajmap1 = Ajmap1; 6620 mpiaij->Aperm1 = Aperm1; 6621 6622 mpiaij->Bjmap1 = Bjmap1; 6623 mpiaij->Bperm1 = Bperm1; 6624 6625 mpiaij->Aimap2 = Aimap2; 6626 mpiaij->Ajmap2 = Ajmap2; 6627 mpiaij->Aperm2 = Aperm2; 6628 6629 mpiaij->Bimap2 = Bimap2; 6630 mpiaij->Bjmap2 = Bjmap2; 6631 mpiaij->Bperm2 = Bperm2; 6632 6633 mpiaij->Cperm1 = Cperm1; 6634 6635 /* Allocate in preallocation. If not used, it has zero cost on host */ 6636 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6637 PetscFunctionReturn(0); 6638 } 6639 6640 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6641 { 6642 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6643 Mat A = mpiaij->A,B = mpiaij->B; 6644 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6645 PetscScalar *Aa,*Ba; 6646 PetscScalar *sendbuf = mpiaij->sendbuf; 6647 PetscScalar *recvbuf = mpiaij->recvbuf; 6648 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6649 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6650 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6651 const PetscCount *Cperm1 = mpiaij->Cperm1; 6652 6653 PetscFunctionBegin; 6654 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6655 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6656 6657 /* Pack entries to be sent to remote */ 6658 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6659 6660 /* Send remote entries to their owner and overlap the communication with local computation */ 6661 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6662 /* Add local entries to A and B */ 6663 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6664 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6665 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6666 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6667 } 6668 for (PetscCount i=0; i<Bnnz; i++) { 6669 PetscScalar sum = 0.0; 6670 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6671 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6672 } 6673 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6674 6675 /* Add received remote entries to A and B */ 6676 for (PetscCount i=0; i<Annz2; i++) { 6677 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6678 } 6679 for (PetscCount i=0; i<Bnnz2; i++) { 6680 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6681 } 6682 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6683 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6684 PetscFunctionReturn(0); 6685 } 6686 6687 /* ----------------------------------------------------------------*/ 6688 6689 /*MC 6690 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6691 6692 Options Database Keys: 6693 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6694 6695 Level: beginner 6696 6697 Notes: 6698 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6699 in this case the values associated with the rows and columns one passes in are set to zero 6700 in the matrix 6701 6702 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6703 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6704 6705 .seealso: `MatCreateAIJ()` 6706 M*/ 6707 6708 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6709 { 6710 Mat_MPIAIJ *b; 6711 PetscMPIInt size; 6712 6713 PetscFunctionBegin; 6714 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6715 6716 PetscCall(PetscNewLog(B,&b)); 6717 B->data = (void*)b; 6718 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6719 B->assembled = PETSC_FALSE; 6720 B->insertmode = NOT_SET_VALUES; 6721 b->size = size; 6722 6723 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6724 6725 /* build cache for off array entries formed */ 6726 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6727 6728 b->donotstash = PETSC_FALSE; 6729 b->colmap = NULL; 6730 b->garray = NULL; 6731 b->roworiented = PETSC_TRUE; 6732 6733 /* stuff used for matrix vector multiply */ 6734 b->lvec = NULL; 6735 b->Mvctx = NULL; 6736 6737 /* stuff for MatGetRow() */ 6738 b->rowindices = NULL; 6739 b->rowvalues = NULL; 6740 b->getrowactive = PETSC_FALSE; 6741 6742 /* flexible pointer used in CUSPARSE classes */ 6743 b->spptr = NULL; 6744 6745 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6746 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6747 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6748 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6749 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6752 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6753 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6754 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6755 #if defined(PETSC_HAVE_CUDA) 6756 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6757 #endif 6758 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6759 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6760 #endif 6761 #if defined(PETSC_HAVE_MKL_SPARSE) 6762 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6763 #endif 6764 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6765 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6766 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6767 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6768 #if defined(PETSC_HAVE_ELEMENTAL) 6769 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6770 #endif 6771 #if defined(PETSC_HAVE_SCALAPACK) 6772 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6773 #endif 6774 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6775 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6776 #if defined(PETSC_HAVE_HYPRE) 6777 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6779 #endif 6780 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6782 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6783 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6784 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6785 PetscFunctionReturn(0); 6786 } 6787 6788 /*@C 6789 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6790 and "off-diagonal" part of the matrix in CSR format. 6791 6792 Collective 6793 6794 Input Parameters: 6795 + comm - MPI communicator 6796 . m - number of local rows (Cannot be PETSC_DECIDE) 6797 . n - This value should be the same as the local size used in creating the 6798 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6799 calculated if N is given) For square matrices n is almost always m. 6800 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6801 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6802 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6803 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6804 . a - matrix values 6805 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6806 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6807 - oa - matrix values 6808 6809 Output Parameter: 6810 . mat - the matrix 6811 6812 Level: advanced 6813 6814 Notes: 6815 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6816 must free the arrays once the matrix has been destroyed and not before. 6817 6818 The i and j indices are 0 based 6819 6820 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6821 6822 This sets local rows and cannot be used to set off-processor values. 6823 6824 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6825 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6826 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6827 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6828 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6829 communication if it is known that only local entries will be set. 6830 6831 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6832 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6833 @*/ 6834 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6835 { 6836 Mat_MPIAIJ *maij; 6837 6838 PetscFunctionBegin; 6839 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6840 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6841 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6842 PetscCall(MatCreate(comm,mat)); 6843 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6844 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6845 maij = (Mat_MPIAIJ*) (*mat)->data; 6846 6847 (*mat)->preallocated = PETSC_TRUE; 6848 6849 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6850 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6851 6852 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6853 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6854 6855 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6856 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6857 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6858 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6859 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6860 PetscFunctionReturn(0); 6861 } 6862 6863 typedef struct { 6864 Mat *mp; /* intermediate products */ 6865 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6866 PetscInt cp; /* number of intermediate products */ 6867 6868 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6869 PetscInt *startsj_s,*startsj_r; 6870 PetscScalar *bufa; 6871 Mat P_oth; 6872 6873 /* may take advantage of merging product->B */ 6874 Mat Bloc; /* B-local by merging diag and off-diag */ 6875 6876 /* cusparse does not have support to split between symbolic and numeric phases. 6877 When api_user is true, we don't need to update the numerical values 6878 of the temporary storage */ 6879 PetscBool reusesym; 6880 6881 /* support for COO values insertion */ 6882 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6883 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6884 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6885 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6886 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6887 PetscMemType mtype; 6888 6889 /* customization */ 6890 PetscBool abmerge; 6891 PetscBool P_oth_bind; 6892 } MatMatMPIAIJBACKEND; 6893 6894 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6895 { 6896 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6897 PetscInt i; 6898 6899 PetscFunctionBegin; 6900 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6901 PetscCall(PetscFree(mmdata->bufa)); 6902 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6903 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6904 PetscCall(MatDestroy(&mmdata->P_oth)); 6905 PetscCall(MatDestroy(&mmdata->Bloc)); 6906 PetscCall(PetscSFDestroy(&mmdata->sf)); 6907 for (i = 0; i < mmdata->cp; i++) { 6908 PetscCall(MatDestroy(&mmdata->mp[i])); 6909 } 6910 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6911 PetscCall(PetscFree(mmdata->own[0])); 6912 PetscCall(PetscFree(mmdata->own)); 6913 PetscCall(PetscFree(mmdata->off[0])); 6914 PetscCall(PetscFree(mmdata->off)); 6915 PetscCall(PetscFree(mmdata)); 6916 PetscFunctionReturn(0); 6917 } 6918 6919 /* Copy selected n entries with indices in idx[] of A to v[]. 6920 If idx is NULL, copy the whole data array of A to v[] 6921 */ 6922 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6923 { 6924 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6925 6926 PetscFunctionBegin; 6927 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6928 if (f) { 6929 PetscCall((*f)(A,n,idx,v)); 6930 } else { 6931 const PetscScalar *vv; 6932 6933 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6934 if (n && idx) { 6935 PetscScalar *w = v; 6936 const PetscInt *oi = idx; 6937 PetscInt j; 6938 6939 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6940 } else { 6941 PetscCall(PetscArraycpy(v,vv,n)); 6942 } 6943 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6944 } 6945 PetscFunctionReturn(0); 6946 } 6947 6948 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6949 { 6950 MatMatMPIAIJBACKEND *mmdata; 6951 PetscInt i,n_d,n_o; 6952 6953 PetscFunctionBegin; 6954 MatCheckProduct(C,1); 6955 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6956 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6957 if (!mmdata->reusesym) { /* update temporary matrices */ 6958 if (mmdata->P_oth) { 6959 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6960 } 6961 if (mmdata->Bloc) { 6962 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6963 } 6964 } 6965 mmdata->reusesym = PETSC_FALSE; 6966 6967 for (i = 0; i < mmdata->cp; i++) { 6968 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6969 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6970 } 6971 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6972 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6973 6974 if (mmdata->mptmp[i]) continue; 6975 if (noff) { 6976 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6977 6978 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6979 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6980 n_o += noff; 6981 n_d += nown; 6982 } else { 6983 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6984 6985 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6986 n_d += mm->nz; 6987 } 6988 } 6989 if (mmdata->hasoffproc) { /* offprocess insertion */ 6990 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6991 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6992 } 6993 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6994 PetscFunctionReturn(0); 6995 } 6996 6997 /* Support for Pt * A, A * P, or Pt * A * P */ 6998 #define MAX_NUMBER_INTERMEDIATE 4 6999 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7000 { 7001 Mat_Product *product = C->product; 7002 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7003 Mat_MPIAIJ *a,*p; 7004 MatMatMPIAIJBACKEND *mmdata; 7005 ISLocalToGlobalMapping P_oth_l2g = NULL; 7006 IS glob = NULL; 7007 const char *prefix; 7008 char pprefix[256]; 7009 const PetscInt *globidx,*P_oth_idx; 7010 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7011 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7012 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7013 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7014 /* a base offset; type-2: sparse with a local to global map table */ 7015 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7016 7017 MatProductType ptype; 7018 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7019 PetscMPIInt size; 7020 7021 PetscFunctionBegin; 7022 MatCheckProduct(C,1); 7023 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7024 ptype = product->type; 7025 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7026 ptype = MATPRODUCT_AB; 7027 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7028 } 7029 switch (ptype) { 7030 case MATPRODUCT_AB: 7031 A = product->A; 7032 P = product->B; 7033 m = A->rmap->n; 7034 n = P->cmap->n; 7035 M = A->rmap->N; 7036 N = P->cmap->N; 7037 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7038 break; 7039 case MATPRODUCT_AtB: 7040 P = product->A; 7041 A = product->B; 7042 m = P->cmap->n; 7043 n = A->cmap->n; 7044 M = P->cmap->N; 7045 N = A->cmap->N; 7046 hasoffproc = PETSC_TRUE; 7047 break; 7048 case MATPRODUCT_PtAP: 7049 A = product->A; 7050 P = product->B; 7051 m = P->cmap->n; 7052 n = P->cmap->n; 7053 M = P->cmap->N; 7054 N = P->cmap->N; 7055 hasoffproc = PETSC_TRUE; 7056 break; 7057 default: 7058 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7059 } 7060 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7061 if (size == 1) hasoffproc = PETSC_FALSE; 7062 7063 /* defaults */ 7064 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7065 mp[i] = NULL; 7066 mptmp[i] = PETSC_FALSE; 7067 rmapt[i] = -1; 7068 cmapt[i] = -1; 7069 rmapa[i] = NULL; 7070 cmapa[i] = NULL; 7071 } 7072 7073 /* customization */ 7074 PetscCall(PetscNew(&mmdata)); 7075 mmdata->reusesym = product->api_user; 7076 if (ptype == MATPRODUCT_AB) { 7077 if (product->api_user) { 7078 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7079 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7080 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7081 PetscOptionsEnd(); 7082 } else { 7083 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7084 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7085 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7086 PetscOptionsEnd(); 7087 } 7088 } else if (ptype == MATPRODUCT_PtAP) { 7089 if (product->api_user) { 7090 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7091 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7092 PetscOptionsEnd(); 7093 } else { 7094 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7095 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7096 PetscOptionsEnd(); 7097 } 7098 } 7099 a = (Mat_MPIAIJ*)A->data; 7100 p = (Mat_MPIAIJ*)P->data; 7101 PetscCall(MatSetSizes(C,m,n,M,N)); 7102 PetscCall(PetscLayoutSetUp(C->rmap)); 7103 PetscCall(PetscLayoutSetUp(C->cmap)); 7104 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7105 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7106 7107 cp = 0; 7108 switch (ptype) { 7109 case MATPRODUCT_AB: /* A * P */ 7110 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7111 7112 /* A_diag * P_local (merged or not) */ 7113 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7114 /* P is product->B */ 7115 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7116 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7117 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7118 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7119 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7120 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7121 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7122 mp[cp]->product->api_user = product->api_user; 7123 PetscCall(MatProductSetFromOptions(mp[cp])); 7124 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7125 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7126 PetscCall(ISGetIndices(glob,&globidx)); 7127 rmapt[cp] = 1; 7128 cmapt[cp] = 2; 7129 cmapa[cp] = globidx; 7130 mptmp[cp] = PETSC_FALSE; 7131 cp++; 7132 } else { /* A_diag * P_diag and A_diag * P_off */ 7133 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7134 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7135 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7136 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7137 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7138 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7139 mp[cp]->product->api_user = product->api_user; 7140 PetscCall(MatProductSetFromOptions(mp[cp])); 7141 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7142 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7143 rmapt[cp] = 1; 7144 cmapt[cp] = 1; 7145 mptmp[cp] = PETSC_FALSE; 7146 cp++; 7147 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7148 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7149 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7150 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7151 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7152 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7153 mp[cp]->product->api_user = product->api_user; 7154 PetscCall(MatProductSetFromOptions(mp[cp])); 7155 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7156 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7157 rmapt[cp] = 1; 7158 cmapt[cp] = 2; 7159 cmapa[cp] = p->garray; 7160 mptmp[cp] = PETSC_FALSE; 7161 cp++; 7162 } 7163 7164 /* A_off * P_other */ 7165 if (mmdata->P_oth) { 7166 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7167 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7168 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7169 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7170 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7171 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7172 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7173 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7174 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7175 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7176 mp[cp]->product->api_user = product->api_user; 7177 PetscCall(MatProductSetFromOptions(mp[cp])); 7178 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7179 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7180 rmapt[cp] = 1; 7181 cmapt[cp] = 2; 7182 cmapa[cp] = P_oth_idx; 7183 mptmp[cp] = PETSC_FALSE; 7184 cp++; 7185 } 7186 break; 7187 7188 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7189 /* A is product->B */ 7190 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7191 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7192 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7193 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7194 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7195 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7196 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7197 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7198 mp[cp]->product->api_user = product->api_user; 7199 PetscCall(MatProductSetFromOptions(mp[cp])); 7200 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7201 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7202 PetscCall(ISGetIndices(glob,&globidx)); 7203 rmapt[cp] = 2; 7204 rmapa[cp] = globidx; 7205 cmapt[cp] = 2; 7206 cmapa[cp] = globidx; 7207 mptmp[cp] = PETSC_FALSE; 7208 cp++; 7209 } else { 7210 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7211 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7212 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7213 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7214 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7215 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7216 mp[cp]->product->api_user = product->api_user; 7217 PetscCall(MatProductSetFromOptions(mp[cp])); 7218 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7219 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7220 PetscCall(ISGetIndices(glob,&globidx)); 7221 rmapt[cp] = 1; 7222 cmapt[cp] = 2; 7223 cmapa[cp] = globidx; 7224 mptmp[cp] = PETSC_FALSE; 7225 cp++; 7226 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7227 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7228 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7229 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7230 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7231 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7232 mp[cp]->product->api_user = product->api_user; 7233 PetscCall(MatProductSetFromOptions(mp[cp])); 7234 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7235 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7236 rmapt[cp] = 2; 7237 rmapa[cp] = p->garray; 7238 cmapt[cp] = 2; 7239 cmapa[cp] = globidx; 7240 mptmp[cp] = PETSC_FALSE; 7241 cp++; 7242 } 7243 break; 7244 case MATPRODUCT_PtAP: 7245 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7246 /* P is product->B */ 7247 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7248 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7249 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7250 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7251 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7252 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7253 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7254 mp[cp]->product->api_user = product->api_user; 7255 PetscCall(MatProductSetFromOptions(mp[cp])); 7256 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7257 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7258 PetscCall(ISGetIndices(glob,&globidx)); 7259 rmapt[cp] = 2; 7260 rmapa[cp] = globidx; 7261 cmapt[cp] = 2; 7262 cmapa[cp] = globidx; 7263 mptmp[cp] = PETSC_FALSE; 7264 cp++; 7265 if (mmdata->P_oth) { 7266 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7267 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7268 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7269 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7270 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7271 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7272 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7273 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7274 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7275 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7276 mp[cp]->product->api_user = product->api_user; 7277 PetscCall(MatProductSetFromOptions(mp[cp])); 7278 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 mptmp[cp] = PETSC_TRUE; 7281 cp++; 7282 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7284 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 rmapt[cp] = 2; 7293 rmapa[cp] = globidx; 7294 cmapt[cp] = 2; 7295 cmapa[cp] = P_oth_idx; 7296 mptmp[cp] = PETSC_FALSE; 7297 cp++; 7298 } 7299 break; 7300 default: 7301 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7302 } 7303 /* sanity check */ 7304 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7305 7306 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7307 for (i = 0; i < cp; i++) { 7308 mmdata->mp[i] = mp[i]; 7309 mmdata->mptmp[i] = mptmp[i]; 7310 } 7311 mmdata->cp = cp; 7312 C->product->data = mmdata; 7313 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7314 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7315 7316 /* memory type */ 7317 mmdata->mtype = PETSC_MEMTYPE_HOST; 7318 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7319 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7320 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7321 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7322 7323 /* prepare coo coordinates for values insertion */ 7324 7325 /* count total nonzeros of those intermediate seqaij Mats 7326 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7327 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7328 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7329 */ 7330 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7331 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7332 if (mptmp[cp]) continue; 7333 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7334 const PetscInt *rmap = rmapa[cp]; 7335 const PetscInt mr = mp[cp]->rmap->n; 7336 const PetscInt rs = C->rmap->rstart; 7337 const PetscInt re = C->rmap->rend; 7338 const PetscInt *ii = mm->i; 7339 for (i = 0; i < mr; i++) { 7340 const PetscInt gr = rmap[i]; 7341 const PetscInt nz = ii[i+1] - ii[i]; 7342 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7343 else ncoo_oown += nz; /* this row is local */ 7344 } 7345 } else ncoo_d += mm->nz; 7346 } 7347 7348 /* 7349 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7350 7351 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7352 7353 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7354 7355 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7356 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7357 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7358 7359 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7360 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7361 */ 7362 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7363 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7364 7365 /* gather (i,j) of nonzeros inserted by remote procs */ 7366 if (hasoffproc) { 7367 PetscSF msf; 7368 PetscInt ncoo2,*coo_i2,*coo_j2; 7369 7370 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7371 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7372 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7373 7374 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7375 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7376 PetscInt *idxoff = mmdata->off[cp]; 7377 PetscInt *idxown = mmdata->own[cp]; 7378 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7379 const PetscInt *rmap = rmapa[cp]; 7380 const PetscInt *cmap = cmapa[cp]; 7381 const PetscInt *ii = mm->i; 7382 PetscInt *coi = coo_i + ncoo_o; 7383 PetscInt *coj = coo_j + ncoo_o; 7384 const PetscInt mr = mp[cp]->rmap->n; 7385 const PetscInt rs = C->rmap->rstart; 7386 const PetscInt re = C->rmap->rend; 7387 const PetscInt cs = C->cmap->rstart; 7388 for (i = 0; i < mr; i++) { 7389 const PetscInt *jj = mm->j + ii[i]; 7390 const PetscInt gr = rmap[i]; 7391 const PetscInt nz = ii[i+1] - ii[i]; 7392 if (gr < rs || gr >= re) { /* this is an offproc row */ 7393 for (j = ii[i]; j < ii[i+1]; j++) { 7394 *coi++ = gr; 7395 *idxoff++ = j; 7396 } 7397 if (!cmapt[cp]) { /* already global */ 7398 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7399 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7400 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7401 } else { /* offdiag */ 7402 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7403 } 7404 ncoo_o += nz; 7405 } else { /* this is a local row */ 7406 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7407 } 7408 } 7409 } 7410 mmdata->off[cp + 1] = idxoff; 7411 mmdata->own[cp + 1] = idxown; 7412 } 7413 7414 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7415 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7416 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7417 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7418 ncoo = ncoo_d + ncoo_oown + ncoo2; 7419 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7420 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7421 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7422 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7423 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7424 PetscCall(PetscFree2(coo_i,coo_j)); 7425 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7426 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7427 coo_i = coo_i2; 7428 coo_j = coo_j2; 7429 } else { /* no offproc values insertion */ 7430 ncoo = ncoo_d; 7431 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7432 7433 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7434 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7435 PetscCall(PetscSFSetUp(mmdata->sf)); 7436 } 7437 mmdata->hasoffproc = hasoffproc; 7438 7439 /* gather (i,j) of nonzeros inserted locally */ 7440 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7441 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7442 PetscInt *coi = coo_i + ncoo_d; 7443 PetscInt *coj = coo_j + ncoo_d; 7444 const PetscInt *jj = mm->j; 7445 const PetscInt *ii = mm->i; 7446 const PetscInt *cmap = cmapa[cp]; 7447 const PetscInt *rmap = rmapa[cp]; 7448 const PetscInt mr = mp[cp]->rmap->n; 7449 const PetscInt rs = C->rmap->rstart; 7450 const PetscInt re = C->rmap->rend; 7451 const PetscInt cs = C->cmap->rstart; 7452 7453 if (mptmp[cp]) continue; 7454 if (rmapt[cp] == 1) { /* consecutive rows */ 7455 /* fill coo_i */ 7456 for (i = 0; i < mr; i++) { 7457 const PetscInt gr = i + rs; 7458 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7459 } 7460 /* fill coo_j */ 7461 if (!cmapt[cp]) { /* type-0, already global */ 7462 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7463 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7464 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7465 } else { /* type-2, local to global for sparse columns */ 7466 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7467 } 7468 ncoo_d += mm->nz; 7469 } else if (rmapt[cp] == 2) { /* sparse rows */ 7470 for (i = 0; i < mr; i++) { 7471 const PetscInt *jj = mm->j + ii[i]; 7472 const PetscInt gr = rmap[i]; 7473 const PetscInt nz = ii[i+1] - ii[i]; 7474 if (gr >= rs && gr < re) { /* local rows */ 7475 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7476 if (!cmapt[cp]) { /* type-0, already global */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7478 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7479 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7480 } else { /* type-2, local to global for sparse columns */ 7481 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7482 } 7483 ncoo_d += nz; 7484 } 7485 } 7486 } 7487 } 7488 if (glob) { 7489 PetscCall(ISRestoreIndices(glob,&globidx)); 7490 } 7491 PetscCall(ISDestroy(&glob)); 7492 if (P_oth_l2g) { 7493 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7494 } 7495 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7496 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7497 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7498 7499 /* preallocate with COO data */ 7500 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7501 PetscCall(PetscFree2(coo_i,coo_j)); 7502 PetscFunctionReturn(0); 7503 } 7504 7505 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7506 { 7507 Mat_Product *product = mat->product; 7508 #if defined(PETSC_HAVE_DEVICE) 7509 PetscBool match = PETSC_FALSE; 7510 PetscBool usecpu = PETSC_FALSE; 7511 #else 7512 PetscBool match = PETSC_TRUE; 7513 #endif 7514 7515 PetscFunctionBegin; 7516 MatCheckProduct(mat,1); 7517 #if defined(PETSC_HAVE_DEVICE) 7518 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7519 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7520 } 7521 if (match) { /* we can always fallback to the CPU if requested */ 7522 switch (product->type) { 7523 case MATPRODUCT_AB: 7524 if (product->api_user) { 7525 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7526 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7527 PetscOptionsEnd(); 7528 } else { 7529 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7530 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7531 PetscOptionsEnd(); 7532 } 7533 break; 7534 case MATPRODUCT_AtB: 7535 if (product->api_user) { 7536 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7537 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7538 PetscOptionsEnd(); 7539 } else { 7540 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7541 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7542 PetscOptionsEnd(); 7543 } 7544 break; 7545 case MATPRODUCT_PtAP: 7546 if (product->api_user) { 7547 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7548 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7549 PetscOptionsEnd(); 7550 } else { 7551 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7552 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7553 PetscOptionsEnd(); 7554 } 7555 break; 7556 default: 7557 break; 7558 } 7559 match = (PetscBool)!usecpu; 7560 } 7561 #endif 7562 if (match) { 7563 switch (product->type) { 7564 case MATPRODUCT_AB: 7565 case MATPRODUCT_AtB: 7566 case MATPRODUCT_PtAP: 7567 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7568 break; 7569 default: 7570 break; 7571 } 7572 } 7573 /* fallback to MPIAIJ ops */ 7574 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7575 PetscFunctionReturn(0); 7576 } 7577 7578 /* 7579 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7580 7581 n - the number of block indices in cc[] 7582 cc - the block indices (must be large enough to contain the indices) 7583 */ 7584 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7585 { 7586 PetscInt cnt = -1,nidx,j; 7587 const PetscInt *idx; 7588 7589 PetscFunctionBegin; 7590 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7591 if (nidx) { 7592 cnt = 0; 7593 cc[cnt] = idx[0]/bs; 7594 for (j=1; j<nidx; j++) { 7595 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7596 } 7597 } 7598 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7599 *n = cnt+1; 7600 PetscFunctionReturn(0); 7601 } 7602 7603 /* 7604 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7605 7606 ncollapsed - the number of block indices 7607 collapsed - the block indices (must be large enough to contain the indices) 7608 */ 7609 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7610 { 7611 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7612 7613 PetscFunctionBegin; 7614 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7615 for (i=start+1; i<start+bs; i++) { 7616 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7617 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7618 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7619 } 7620 *ncollapsed = nprev; 7621 if (collapsed) *collapsed = cprev; 7622 PetscFunctionReturn(0); 7623 } 7624 7625 /* -------------------------------------------------------------------------- */ 7626 /* 7627 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7628 7629 Input Parameter: 7630 . Amat - matrix 7631 - symmetrize - make the result symmetric 7632 + scale - scale with diagonal 7633 7634 Output Parameter: 7635 . a_Gmat - output scalar graph >= 0 7636 7637 */ 7638 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7639 { 7640 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7641 MPI_Comm comm; 7642 Mat Gmat; 7643 PetscBool ismpiaij,isseqaij; 7644 Mat a, b, c; 7645 MatType jtype; 7646 7647 PetscFunctionBegin; 7648 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7649 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7650 PetscCall(MatGetSize(Amat, &MM, &NN)); 7651 PetscCall(MatGetBlockSize(Amat, &bs)); 7652 nloc = (Iend-Istart)/bs; 7653 7654 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7655 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7656 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7657 7658 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7659 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7660 implementation */ 7661 if (bs > 1) { 7662 PetscCall(MatGetType(Amat,&jtype)); 7663 PetscCall(MatCreate(comm, &Gmat)); 7664 PetscCall(MatSetType(Gmat, jtype)); 7665 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7666 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7667 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7668 PetscInt *d_nnz, *o_nnz; 7669 MatScalar *aa,val,AA[4096]; 7670 PetscInt *aj,*ai,AJ[4096],nc; 7671 if (isseqaij) { a = Amat; b = NULL; } 7672 else { 7673 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7674 a = d->A; b = d->B; 7675 } 7676 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7677 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7678 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7679 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7680 const PetscInt *cols; 7681 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7682 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7683 nnz[brow/bs] = jj/bs; 7684 if (jj%bs) ok = 0; 7685 if (cols) j0 = cols[0]; 7686 else j0 = -1; 7687 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7688 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7689 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7690 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7691 if (jj%bs) ok = 0; 7692 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7693 if (nnz[brow/bs] != jj/bs) ok = 0; 7694 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7695 } 7696 if (!ok) { 7697 PetscCall(PetscFree2(d_nnz,o_nnz)); 7698 goto old_bs; 7699 } 7700 } 7701 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7702 } 7703 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7704 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7705 PetscCall(PetscFree2(d_nnz,o_nnz)); 7706 // diag 7707 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7708 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7709 ai = aseq->i; 7710 n = ai[brow+1] - ai[brow]; 7711 aj = aseq->j + ai[brow]; 7712 for (int k=0; k<n; k += bs) { // block columns 7713 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7714 val = 0; 7715 for (int ii=0; ii<bs; ii++) { // rows in block 7716 aa = aseq->a + ai[brow+ii] + k; 7717 for (int jj=0; jj<bs; jj++) { // columns in block 7718 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7719 } 7720 } 7721 AA[k/bs] = val; 7722 } 7723 grow = Istart/bs + brow/bs; 7724 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7725 } 7726 // off-diag 7727 if (ismpiaij) { 7728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7729 const PetscScalar *vals; 7730 const PetscInt *cols, *garray = aij->garray; 7731 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7732 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7733 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7734 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7735 AA[k/bs] = 0; 7736 AJ[cidx] = garray[cols[k]]/bs; 7737 } 7738 nc = ncols/bs; 7739 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7740 for (int ii=0; ii<bs; ii++) { // rows in block 7741 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7742 for (int k=0; k<ncols; k += bs) { 7743 for (int jj=0; jj<bs; jj++) { // cols in block 7744 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7745 } 7746 } 7747 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7748 } 7749 grow = Istart/bs + brow/bs; 7750 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7751 } 7752 } 7753 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7754 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7755 } else { 7756 const PetscScalar *vals; 7757 const PetscInt *idx; 7758 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7759 old_bs: 7760 /* 7761 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7762 */ 7763 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7764 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7765 if (isseqaij) { 7766 PetscInt max_d_nnz; 7767 /* 7768 Determine exact preallocation count for (sequential) scalar matrix 7769 */ 7770 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7771 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7772 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7773 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7774 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7775 } 7776 PetscCall(PetscFree3(w0,w1,w2)); 7777 } else if (ismpiaij) { 7778 Mat Daij,Oaij; 7779 const PetscInt *garray; 7780 PetscInt max_d_nnz; 7781 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7782 /* 7783 Determine exact preallocation count for diagonal block portion of scalar matrix 7784 */ 7785 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7786 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7787 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7788 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7789 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7790 } 7791 PetscCall(PetscFree3(w0,w1,w2)); 7792 /* 7793 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7794 */ 7795 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7796 o_nnz[jj] = 0; 7797 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7798 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7799 o_nnz[jj] += ncols; 7800 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7801 } 7802 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7803 } 7804 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7805 /* get scalar copy (norms) of matrix */ 7806 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7807 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7808 PetscCall(PetscFree2(d_nnz,o_nnz)); 7809 for (Ii = Istart; Ii < Iend; Ii++) { 7810 PetscInt dest_row = Ii/bs; 7811 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7812 for (jj=0; jj<ncols; jj++) { 7813 PetscInt dest_col = idx[jj]/bs; 7814 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7815 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7816 } 7817 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7818 } 7819 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7820 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7821 } 7822 } else { 7823 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7824 procedure via MatAbs API */ 7825 /* just copy scalar matrix & abs() */ 7826 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7827 if (isseqaij) { a = Gmat; b = NULL; } 7828 else { 7829 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7830 a = d->A; b = d->B; 7831 } 7832 /* abs */ 7833 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7834 MatInfo info; 7835 PetscScalar *avals; 7836 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7837 PetscCall(MatSeqAIJGetArray(c,&avals)); 7838 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7839 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7840 } 7841 } 7842 if (symmetrize) { 7843 PetscBool isset,issym; 7844 PetscCall(MatIsSymmetricKnown(Amat,&isset,&issym)); 7845 if (!isset || !issym) { 7846 Mat matTrans; 7847 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7848 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7849 PetscCall(MatDestroy(&matTrans)); 7850 } 7851 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7852 } else { 7853 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7854 } 7855 if (scale) { 7856 /* scale c for all diagonal values = 1 or -1 */ 7857 Vec diag; 7858 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7859 PetscCall(MatGetDiagonal(Gmat, diag)); 7860 PetscCall(VecReciprocal(diag)); 7861 PetscCall(VecSqrtAbs(diag)); 7862 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7863 PetscCall(VecDestroy(&diag)); 7864 } 7865 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7866 *a_Gmat = Gmat; 7867 PetscFunctionReturn(0); 7868 } 7869 7870 /* -------------------------------------------------------------------------- */ 7871 /*@C 7872 MatFilter_AIJ - filter values with small absolute values 7873 With vfilter < 0 does nothing so should not be called. 7874 7875 Collective on Mat 7876 7877 Input Parameters: 7878 + Gmat - the graph 7879 . vfilter - threshold parameter [0,1) 7880 7881 Output Parameter: 7882 . filteredG - output filtered scalar graph 7883 7884 Level: developer 7885 7886 Notes: 7887 This is called before graph coarsers are called. 7888 This could go into Mat, move 'symm' to GAMG 7889 7890 .seealso: `PCGAMGSetThreshold()` 7891 @*/ 7892 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7893 { 7894 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7895 Mat tGmat; 7896 MPI_Comm comm; 7897 const PetscScalar *vals; 7898 const PetscInt *idx; 7899 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7900 MatScalar *AA; // this is checked in graph 7901 PetscBool isseqaij; 7902 Mat a, b, c; 7903 MatType jtype; 7904 7905 PetscFunctionBegin; 7906 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7907 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7908 PetscCall(MatGetType(Gmat,&jtype)); 7909 PetscCall(MatCreate(comm, &tGmat)); 7910 PetscCall(MatSetType(tGmat, jtype)); 7911 7912 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7913 Also, if the matrix is symmetric, can we skip this 7914 operation? It can be very expensive on large matrices. */ 7915 7916 // global sizes 7917 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7918 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7919 nloc = Iend - Istart; 7920 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7921 if (isseqaij) { a = Gmat; b = NULL; } 7922 else { 7923 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7924 a = d->A; b = d->B; 7925 garray = d->garray; 7926 } 7927 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7928 for (PetscInt row=0; row < nloc; row++) { 7929 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7930 d_nnz[row] = ncols; 7931 if (ncols>maxcols) maxcols=ncols; 7932 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7933 } 7934 if (b) { 7935 for (PetscInt row=0; row < nloc; row++) { 7936 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7937 o_nnz[row] = ncols; 7938 if (ncols>maxcols) maxcols=ncols; 7939 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7940 } 7941 } 7942 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7943 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7944 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7945 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7946 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7947 PetscCall(PetscFree2(d_nnz,o_nnz)); 7948 // 7949 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7950 nnz0 = nnz1 = 0; 7951 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7952 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7953 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7954 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7955 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7956 if (PetscRealPart(sv) > vfilter) { 7957 nnz1++; 7958 PetscInt cid = idx[jj] + Istart; //diag 7959 if (c!=a) cid = garray[idx[jj]]; 7960 AA[ncol_row] = vals[jj]; 7961 AJ[ncol_row] = cid; 7962 ncol_row++; 7963 } 7964 } 7965 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7966 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7967 } 7968 } 7969 PetscCall(PetscFree2(AA,AJ)); 7970 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7971 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7972 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7973 7974 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7975 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7976 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7977 7978 *filteredG = tGmat; 7979 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7980 PetscFunctionReturn(0); 7981 } 7982 7983 /* 7984 Special version for direct calls from Fortran 7985 */ 7986 #include <petsc/private/fortranimpl.h> 7987 7988 /* Change these macros so can be used in void function */ 7989 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7990 #undef PetscCall 7991 #define PetscCall(...) do { \ 7992 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7993 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7994 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7995 return; \ 7996 } \ 7997 } while (0) 7998 7999 #undef SETERRQ 8000 #define SETERRQ(comm,ierr,...) do { \ 8001 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 8002 return; \ 8003 } while (0) 8004 8005 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8006 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8007 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8008 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8009 #else 8010 #endif 8011 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8012 { 8013 Mat mat = *mmat; 8014 PetscInt m = *mm, n = *mn; 8015 InsertMode addv = *maddv; 8016 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8017 PetscScalar value; 8018 8019 MatCheckPreallocated(mat,1); 8020 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8021 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8022 { 8023 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8024 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8025 PetscBool roworiented = aij->roworiented; 8026 8027 /* Some Variables required in the macro */ 8028 Mat A = aij->A; 8029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8030 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8031 MatScalar *aa; 8032 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8033 Mat B = aij->B; 8034 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8035 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8036 MatScalar *ba; 8037 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8038 * cannot use "#if defined" inside a macro. */ 8039 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8040 8041 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8042 PetscInt nonew = a->nonew; 8043 MatScalar *ap1,*ap2; 8044 8045 PetscFunctionBegin; 8046 PetscCall(MatSeqAIJGetArray(A,&aa)); 8047 PetscCall(MatSeqAIJGetArray(B,&ba)); 8048 for (i=0; i<m; i++) { 8049 if (im[i] < 0) continue; 8050 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8051 if (im[i] >= rstart && im[i] < rend) { 8052 row = im[i] - rstart; 8053 lastcol1 = -1; 8054 rp1 = aj + ai[row]; 8055 ap1 = aa + ai[row]; 8056 rmax1 = aimax[row]; 8057 nrow1 = ailen[row]; 8058 low1 = 0; 8059 high1 = nrow1; 8060 lastcol2 = -1; 8061 rp2 = bj + bi[row]; 8062 ap2 = ba + bi[row]; 8063 rmax2 = bimax[row]; 8064 nrow2 = bilen[row]; 8065 low2 = 0; 8066 high2 = nrow2; 8067 8068 for (j=0; j<n; j++) { 8069 if (roworiented) value = v[i*n+j]; 8070 else value = v[i+j*m]; 8071 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8072 if (in[j] >= cstart && in[j] < cend) { 8073 col = in[j] - cstart; 8074 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8075 } else if (in[j] < 0) continue; 8076 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8077 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8078 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8079 } else { 8080 if (mat->was_assembled) { 8081 if (!aij->colmap) { 8082 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8083 } 8084 #if defined(PETSC_USE_CTABLE) 8085 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8086 col--; 8087 #else 8088 col = aij->colmap[in[j]] - 1; 8089 #endif 8090 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8091 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8092 col = in[j]; 8093 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8094 B = aij->B; 8095 b = (Mat_SeqAIJ*)B->data; 8096 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8097 rp2 = bj + bi[row]; 8098 ap2 = ba + bi[row]; 8099 rmax2 = bimax[row]; 8100 nrow2 = bilen[row]; 8101 low2 = 0; 8102 high2 = nrow2; 8103 bm = aij->B->rmap->n; 8104 ba = b->a; 8105 inserted = PETSC_FALSE; 8106 } 8107 } else col = in[j]; 8108 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8109 } 8110 } 8111 } else if (!aij->donotstash) { 8112 if (roworiented) { 8113 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8114 } else { 8115 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8116 } 8117 } 8118 } 8119 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8120 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8121 } 8122 PetscFunctionReturnVoid(); 8123 } 8124 8125 /* Undefining these here since they were redefined from their original definition above! No 8126 * other PETSc functions should be defined past this point, as it is impossible to recover the 8127 * original definitions */ 8128 #undef PetscCall 8129 #undef SETERRQ 8130