1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 27 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 28 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 36 and MATMPIAIJ otherwise. As a result, for single process communicators, 37 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 43 44 Developer Notes: 45 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 57 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 58 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 97 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 107 const PetscInt *ia,*ib; 108 const MatScalar *aa,*bb,*aav,*bav; 109 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 110 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 119 for (i=0; i<m; i++) { 120 na = ia[i+1] - ia[i]; 121 nb = ib[i+1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j=0; j<na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j=0; j <nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 144 cnt = 0; 145 for (i=0; i<m; i++) { 146 na = ia[i+1] - ia[i]; 147 nb = ib[i+1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j=0; j<na;j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j=0; j<nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y,&cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A,D,is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y,D,is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 189 PetscInt i,rstart,nrows,*rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 194 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 195 for (i=0; i<nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 203 PetscInt i,m,n,*garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A,&m,&n)); 211 PetscCall(PetscCalloc1(n,&work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 216 if (type == NORM_2) { 217 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 218 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 219 } 220 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 221 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 222 } 223 } else if (type == NORM_1) { 224 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 225 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 226 } 227 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 228 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 229 } 230 } else if (type == NORM_INFINITY) { 231 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 232 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 233 } 234 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 235 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 236 } 237 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 238 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 239 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 240 } 241 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 242 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 243 } 244 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 245 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 246 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 247 } 248 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 249 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 250 } 251 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 252 if (type == NORM_INFINITY) { 253 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 254 } else { 255 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 256 } 257 PetscCall(PetscFree(work)); 258 if (type == NORM_2) { 259 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 260 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 261 for (i=0; i<n; i++) reductions[i] /= m; 262 } 263 PetscFunctionReturn(0); 264 } 265 266 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 267 { 268 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 269 IS sis,gis; 270 const PetscInt *isis,*igis; 271 PetscInt n,*iis,nsis,ngis,rstart,i; 272 273 PetscFunctionBegin; 274 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 275 PetscCall(MatFindNonzeroRows(a->B,&gis)); 276 PetscCall(ISGetSize(gis,&ngis)); 277 PetscCall(ISGetSize(sis,&nsis)); 278 PetscCall(ISGetIndices(sis,&isis)); 279 PetscCall(ISGetIndices(gis,&igis)); 280 281 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 282 PetscCall(PetscArraycpy(iis,igis,ngis)); 283 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 284 n = ngis + nsis; 285 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 286 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 287 for (i=0; i<n; i++) iis[i] += rstart; 288 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 289 290 PetscCall(ISRestoreIndices(sis,&isis)); 291 PetscCall(ISRestoreIndices(gis,&igis)); 292 PetscCall(ISDestroy(&sis)); 293 PetscCall(ISDestroy(&gis)); 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Local utility routine that creates a mapping from the global column 299 number to the local number in the off-diagonal part of the local 300 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 301 a slightly higher hash table cost; without it it is not scalable (each processor 302 has an order N integer array but is fast to access. 303 */ 304 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 305 { 306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 307 PetscInt n = aij->B->cmap->n,i; 308 309 PetscFunctionBegin; 310 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 311 #if defined(PETSC_USE_CTABLE) 312 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 313 for (i=0; i<n; i++) { 314 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 315 } 316 #else 317 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 318 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 319 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 320 #endif 321 PetscFunctionReturn(0); 322 } 323 324 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 325 { \ 326 if (col <= lastcol1) low1 = 0; \ 327 else high1 = nrow1; \ 328 lastcol1 = col;\ 329 while (high1-low1 > 5) { \ 330 t = (low1+high1)/2; \ 331 if (rp1[t] > col) high1 = t; \ 332 else low1 = t; \ 333 } \ 334 for (_i=low1; _i<high1; _i++) { \ 335 if (rp1[_i] > col) break; \ 336 if (rp1[_i] == col) { \ 337 if (addv == ADD_VALUES) { \ 338 ap1[_i] += value; \ 339 /* Not sure LogFlops will slow dow the code or not */ \ 340 (void)PetscLogFlops(1.0); \ 341 } \ 342 else ap1[_i] = value; \ 343 goto a_noinsert; \ 344 } \ 345 } \ 346 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 347 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 348 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 349 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 350 N = nrow1++ - 1; a->nz++; high1++; \ 351 /* shift up all the later entries in this row */ \ 352 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 353 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 354 rp1[_i] = col; \ 355 ap1[_i] = value; \ 356 A->nonzerostate++;\ 357 a_noinsert: ; \ 358 ailen[row] = nrow1; \ 359 } 360 361 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 362 { \ 363 if (col <= lastcol2) low2 = 0; \ 364 else high2 = nrow2; \ 365 lastcol2 = col; \ 366 while (high2-low2 > 5) { \ 367 t = (low2+high2)/2; \ 368 if (rp2[t] > col) high2 = t; \ 369 else low2 = t; \ 370 } \ 371 for (_i=low2; _i<high2; _i++) { \ 372 if (rp2[_i] > col) break; \ 373 if (rp2[_i] == col) { \ 374 if (addv == ADD_VALUES) { \ 375 ap2[_i] += value; \ 376 (void)PetscLogFlops(1.0); \ 377 } \ 378 else ap2[_i] = value; \ 379 goto b_noinsert; \ 380 } \ 381 } \ 382 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 384 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 385 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 386 N = nrow2++ - 1; b->nz++; high2++; \ 387 /* shift up all the later entries in this row */ \ 388 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 389 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 390 rp2[_i] = col; \ 391 ap2[_i] = value; \ 392 B->nonzerostate++; \ 393 b_noinsert: ; \ 394 bilen[row] = nrow2; \ 395 } 396 397 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 398 { 399 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 400 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 401 PetscInt l,*garray = mat->garray,diag; 402 PetscScalar *aa,*ba; 403 404 PetscFunctionBegin; 405 /* code only works for square matrices A */ 406 407 /* find size of row to the left of the diagonal part */ 408 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 409 row = row - diag; 410 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 411 if (garray[b->j[b->i[row]+l]] > diag) break; 412 } 413 if (l) { 414 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 415 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 416 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 417 } 418 419 /* diagonal part */ 420 if (a->i[row+1]-a->i[row]) { 421 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 422 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 423 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 424 } 425 426 /* right of diagonal part */ 427 if (b->i[row+1]-b->i[row]-l) { 428 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 429 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 430 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 431 } 432 PetscFunctionReturn(0); 433 } 434 435 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 436 { 437 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 438 PetscScalar value = 0.0; 439 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 440 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 441 PetscBool roworiented = aij->roworiented; 442 443 /* Some Variables required in the macro */ 444 Mat A = aij->A; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 446 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 447 PetscBool ignorezeroentries = a->ignorezeroentries; 448 Mat B = aij->B; 449 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 450 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 451 MatScalar *aa,*ba; 452 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 453 PetscInt nonew; 454 MatScalar *ap1,*ap2; 455 456 PetscFunctionBegin; 457 PetscCall(MatSeqAIJGetArray(A,&aa)); 458 PetscCall(MatSeqAIJGetArray(B,&ba)); 459 for (i=0; i<m; i++) { 460 if (im[i] < 0) continue; 461 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 462 if (im[i] >= rstart && im[i] < rend) { 463 row = im[i] - rstart; 464 lastcol1 = -1; 465 rp1 = aj + ai[row]; 466 ap1 = aa + ai[row]; 467 rmax1 = aimax[row]; 468 nrow1 = ailen[row]; 469 low1 = 0; 470 high1 = nrow1; 471 lastcol2 = -1; 472 rp2 = bj + bi[row]; 473 ap2 = ba + bi[row]; 474 rmax2 = bimax[row]; 475 nrow2 = bilen[row]; 476 low2 = 0; 477 high2 = nrow2; 478 479 for (j=0; j<n; j++) { 480 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 481 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 482 if (in[j] >= cstart && in[j] < cend) { 483 col = in[j] - cstart; 484 nonew = a->nonew; 485 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 486 } else if (in[j] < 0) { 487 continue; 488 } else { 489 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 490 if (mat->was_assembled) { 491 if (!aij->colmap) { 492 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 493 } 494 #if defined(PETSC_USE_CTABLE) 495 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 496 col--; 497 #else 498 col = aij->colmap[in[j]] - 1; 499 #endif 500 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 501 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 502 col = in[j]; 503 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 504 B = aij->B; 505 b = (Mat_SeqAIJ*)B->data; 506 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 507 rp2 = bj + bi[row]; 508 ap2 = ba + bi[row]; 509 rmax2 = bimax[row]; 510 nrow2 = bilen[row]; 511 low2 = 0; 512 high2 = nrow2; 513 bm = aij->B->rmap->n; 514 ba = b->a; 515 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 516 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 517 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 518 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 519 } 520 } else col = in[j]; 521 nonew = b->nonew; 522 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 523 } 524 } 525 } else { 526 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 527 if (!aij->donotstash) { 528 mat->assembled = PETSC_FALSE; 529 if (roworiented) { 530 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 531 } else { 532 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 533 } 534 } 535 } 536 } 537 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 538 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 539 PetscFunctionReturn(0); 540 } 541 542 /* 543 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 544 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 545 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 546 */ 547 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 Mat A = aij->A; /* diagonal part of the matrix */ 551 Mat B = aij->B; /* offdiagonal part of the matrix */ 552 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 553 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 554 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 555 PetscInt *ailen = a->ilen,*aj = a->j; 556 PetscInt *bilen = b->ilen,*bj = b->j; 557 PetscInt am = aij->A->rmap->n,j; 558 PetscInt diag_so_far = 0,dnz; 559 PetscInt offd_so_far = 0,onz; 560 561 PetscFunctionBegin; 562 /* Iterate over all rows of the matrix */ 563 for (j=0; j<am; j++) { 564 dnz = onz = 0; 565 /* Iterate over all non-zero columns of the current row */ 566 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 567 /* If column is in the diagonal */ 568 if (mat_j[col] >= cstart && mat_j[col] < cend) { 569 aj[diag_so_far++] = mat_j[col] - cstart; 570 dnz++; 571 } else { /* off-diagonal entries */ 572 bj[offd_so_far++] = mat_j[col]; 573 onz++; 574 } 575 } 576 ailen[j] = dnz; 577 bilen[j] = onz; 578 } 579 PetscFunctionReturn(0); 580 } 581 582 /* 583 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 584 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 585 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 586 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 587 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 588 */ 589 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 590 { 591 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 592 Mat A = aij->A; /* diagonal part of the matrix */ 593 Mat B = aij->B; /* offdiagonal part of the matrix */ 594 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 596 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 598 PetscInt *ailen = a->ilen,*aj = a->j; 599 PetscInt *bilen = b->ilen,*bj = b->j; 600 PetscInt am = aij->A->rmap->n,j; 601 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 602 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 603 PetscScalar *aa = a->a,*ba = b->a; 604 605 PetscFunctionBegin; 606 /* Iterate over all rows of the matrix */ 607 for (j=0; j<am; j++) { 608 dnz_row = onz_row = 0; 609 rowstart_offd = full_offd_i[j]; 610 rowstart_diag = full_diag_i[j]; 611 /* Iterate over all non-zero columns of the current row */ 612 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 613 /* If column is in the diagonal */ 614 if (mat_j[col] >= cstart && mat_j[col] < cend) { 615 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 616 aa[rowstart_diag+dnz_row] = mat_a[col]; 617 dnz_row++; 618 } else { /* off-diagonal entries */ 619 bj[rowstart_offd+onz_row] = mat_j[col]; 620 ba[rowstart_offd+onz_row] = mat_a[col]; 621 onz_row++; 622 } 623 } 624 ailen[j] = dnz_row; 625 bilen[j] = onz_row; 626 } 627 PetscFunctionReturn(0); 628 } 629 630 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 631 { 632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 633 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 634 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 635 636 PetscFunctionBegin; 637 for (i=0; i<m; i++) { 638 if (idxm[i] < 0) continue; /* negative row */ 639 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 640 if (idxm[i] >= rstart && idxm[i] < rend) { 641 row = idxm[i] - rstart; 642 for (j=0; j<n; j++) { 643 if (idxn[j] < 0) continue; /* negative column */ 644 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 645 if (idxn[j] >= cstart && idxn[j] < cend) { 646 col = idxn[j] - cstart; 647 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 648 } else { 649 if (!aij->colmap) { 650 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 651 } 652 #if defined(PETSC_USE_CTABLE) 653 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 654 col--; 655 #else 656 col = aij->colmap[idxn[j]] - 1; 657 #endif 658 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 659 else { 660 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 661 } 662 } 663 } 664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 665 } 666 PetscFunctionReturn(0); 667 } 668 669 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 670 { 671 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 672 PetscInt nstash,reallocs; 673 674 PetscFunctionBegin; 675 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 676 677 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 678 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 679 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 680 PetscFunctionReturn(0); 681 } 682 683 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 PetscMPIInt n; 687 PetscInt i,j,rstart,ncols,flg; 688 PetscInt *row,*col; 689 PetscBool other_disassembled; 690 PetscScalar *val; 691 692 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 693 694 PetscFunctionBegin; 695 if (!aij->donotstash && !mat->nooffprocentries) { 696 while (1) { 697 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 698 if (!flg) break; 699 700 for (i=0; i<n;) { 701 /* Now identify the consecutive vals belonging to the same row */ 702 for (j=i,rstart=row[j]; j<n; j++) { 703 if (row[j] != rstart) break; 704 } 705 if (j < n) ncols = j-i; 706 else ncols = n-i; 707 /* Now assemble all these values with a single function call */ 708 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 709 i = j; 710 } 711 } 712 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 713 } 714 #if defined(PETSC_HAVE_DEVICE) 715 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 716 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 717 if (mat->boundtocpu) { 718 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 719 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 720 } 721 #endif 722 PetscCall(MatAssemblyBegin(aij->A,mode)); 723 PetscCall(MatAssemblyEnd(aij->A,mode)); 724 725 /* determine if any processor has disassembled, if so we must 726 also disassemble ourself, in order that we may reassemble. */ 727 /* 728 if nonzero structure of submatrix B cannot change then we know that 729 no processor disassembled thus we can skip this stuff 730 */ 731 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 732 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 733 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 734 PetscCall(MatDisAssemble_MPIAIJ(mat)); 735 } 736 } 737 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 738 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 739 } 740 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 741 #if defined(PETSC_HAVE_DEVICE) 742 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 743 #endif 744 PetscCall(MatAssemblyBegin(aij->B,mode)); 745 PetscCall(MatAssemblyEnd(aij->B,mode)); 746 747 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 748 749 aij->rowvalues = NULL; 750 751 PetscCall(VecDestroy(&aij->diag)); 752 753 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 754 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 755 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 756 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 757 } 758 #if defined(PETSC_HAVE_DEVICE) 759 mat->offloadmask = PETSC_OFFLOAD_BOTH; 760 #endif 761 PetscFunctionReturn(0); 762 } 763 764 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 765 { 766 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 767 768 PetscFunctionBegin; 769 PetscCall(MatZeroEntries(l->A)); 770 PetscCall(MatZeroEntries(l->B)); 771 PetscFunctionReturn(0); 772 } 773 774 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 775 { 776 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 777 PetscObjectState sA, sB; 778 PetscInt *lrows; 779 PetscInt r, len; 780 PetscBool cong, lch, gch; 781 782 PetscFunctionBegin; 783 /* get locally owned rows */ 784 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 785 PetscCall(MatHasCongruentLayouts(A,&cong)); 786 /* fix right hand side if needed */ 787 if (x && b) { 788 const PetscScalar *xx; 789 PetscScalar *bb; 790 791 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 792 PetscCall(VecGetArrayRead(x, &xx)); 793 PetscCall(VecGetArray(b, &bb)); 794 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 795 PetscCall(VecRestoreArrayRead(x, &xx)); 796 PetscCall(VecRestoreArray(b, &bb)); 797 } 798 799 sA = mat->A->nonzerostate; 800 sB = mat->B->nonzerostate; 801 802 if (diag != 0.0 && cong) { 803 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 804 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 805 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 806 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 807 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 808 PetscInt nnwA, nnwB; 809 PetscBool nnzA, nnzB; 810 811 nnwA = aijA->nonew; 812 nnwB = aijB->nonew; 813 nnzA = aijA->keepnonzeropattern; 814 nnzB = aijB->keepnonzeropattern; 815 if (!nnzA) { 816 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 817 aijA->nonew = 0; 818 } 819 if (!nnzB) { 820 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 821 aijB->nonew = 0; 822 } 823 /* Must zero here before the next loop */ 824 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 825 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 826 for (r = 0; r < len; ++r) { 827 const PetscInt row = lrows[r] + A->rmap->rstart; 828 if (row >= A->cmap->N) continue; 829 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 830 } 831 aijA->nonew = nnwA; 832 aijB->nonew = nnwB; 833 } else { 834 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 835 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 836 } 837 PetscCall(PetscFree(lrows)); 838 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 839 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 840 841 /* reduce nonzerostate */ 842 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 843 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 844 if (gch) A->nonzerostate++; 845 PetscFunctionReturn(0); 846 } 847 848 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 849 { 850 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 851 PetscMPIInt n = A->rmap->n; 852 PetscInt i,j,r,m,len = 0; 853 PetscInt *lrows,*owners = A->rmap->range; 854 PetscMPIInt p = 0; 855 PetscSFNode *rrows; 856 PetscSF sf; 857 const PetscScalar *xx; 858 PetscScalar *bb,*mask,*aij_a; 859 Vec xmask,lmask; 860 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 861 const PetscInt *aj, *ii,*ridx; 862 PetscScalar *aa; 863 864 PetscFunctionBegin; 865 /* Create SF where leaves are input rows and roots are owned rows */ 866 PetscCall(PetscMalloc1(n, &lrows)); 867 for (r = 0; r < n; ++r) lrows[r] = -1; 868 PetscCall(PetscMalloc1(N, &rrows)); 869 for (r = 0; r < N; ++r) { 870 const PetscInt idx = rows[r]; 871 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 872 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 873 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 874 } 875 rrows[r].rank = p; 876 rrows[r].index = rows[r] - owners[p]; 877 } 878 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 879 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 880 /* Collect flags for rows to be zeroed */ 881 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 883 PetscCall(PetscSFDestroy(&sf)); 884 /* Compress and put in row numbers */ 885 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 886 /* zero diagonal part of matrix */ 887 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 888 /* handle off diagonal part of matrix */ 889 PetscCall(MatCreateVecs(A,&xmask,NULL)); 890 PetscCall(VecDuplicate(l->lvec,&lmask)); 891 PetscCall(VecGetArray(xmask,&bb)); 892 for (i=0; i<len; i++) bb[lrows[i]] = 1; 893 PetscCall(VecRestoreArray(xmask,&bb)); 894 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 896 PetscCall(VecDestroy(&xmask)); 897 if (x && b) { /* this code is buggy when the row and column layout don't match */ 898 PetscBool cong; 899 900 PetscCall(MatHasCongruentLayouts(A,&cong)); 901 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 902 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 904 PetscCall(VecGetArrayRead(l->lvec,&xx)); 905 PetscCall(VecGetArray(b,&bb)); 906 } 907 PetscCall(VecGetArray(lmask,&mask)); 908 /* remove zeroed rows of off diagonal matrix */ 909 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 910 ii = aij->i; 911 for (i=0; i<len; i++) { 912 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 913 } 914 /* loop over all elements of off process part of matrix zeroing removed columns*/ 915 if (aij->compressedrow.use) { 916 m = aij->compressedrow.nrows; 917 ii = aij->compressedrow.i; 918 ridx = aij->compressedrow.rindex; 919 for (i=0; i<m; i++) { 920 n = ii[i+1] - ii[i]; 921 aj = aij->j + ii[i]; 922 aa = aij_a + ii[i]; 923 924 for (j=0; j<n; j++) { 925 if (PetscAbsScalar(mask[*aj])) { 926 if (b) bb[*ridx] -= *aa*xx[*aj]; 927 *aa = 0.0; 928 } 929 aa++; 930 aj++; 931 } 932 ridx++; 933 } 934 } else { /* do not use compressed row format */ 935 m = l->B->rmap->n; 936 for (i=0; i<m; i++) { 937 n = ii[i+1] - ii[i]; 938 aj = aij->j + ii[i]; 939 aa = aij_a + ii[i]; 940 for (j=0; j<n; j++) { 941 if (PetscAbsScalar(mask[*aj])) { 942 if (b) bb[i] -= *aa*xx[*aj]; 943 *aa = 0.0; 944 } 945 aa++; 946 aj++; 947 } 948 } 949 } 950 if (x && b) { 951 PetscCall(VecRestoreArray(b,&bb)); 952 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 953 } 954 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 955 PetscCall(VecRestoreArray(lmask,&mask)); 956 PetscCall(VecDestroy(&lmask)); 957 PetscCall(PetscFree(lrows)); 958 959 /* only change matrix nonzero state if pattern was allowed to be changed */ 960 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 961 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 962 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 963 } 964 PetscFunctionReturn(0); 965 } 966 967 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscInt nt; 971 VecScatter Mvctx = a->Mvctx; 972 973 PetscFunctionBegin; 974 PetscCall(VecGetLocalSize(xx,&nt)); 975 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 976 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 977 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 978 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 979 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 987 PetscFunctionBegin; 988 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 989 PetscFunctionReturn(0); 990 } 991 992 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 VecScatter Mvctx = a->Mvctx; 996 997 PetscFunctionBegin; 998 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 999 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1000 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1001 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1006 { 1007 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1008 1009 PetscFunctionBegin; 1010 /* do nondiagonal part */ 1011 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1012 /* do local part */ 1013 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1014 /* add partial results together */ 1015 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1021 { 1022 MPI_Comm comm; 1023 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1024 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1025 IS Me,Notme; 1026 PetscInt M,N,first,last,*notme,i; 1027 PetscBool lf; 1028 PetscMPIInt size; 1029 1030 PetscFunctionBegin; 1031 /* Easy test: symmetric diagonal block */ 1032 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1033 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1034 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1035 if (!*f) PetscFunctionReturn(0); 1036 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1037 PetscCallMPI(MPI_Comm_size(comm,&size)); 1038 if (size == 1) PetscFunctionReturn(0); 1039 1040 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1041 PetscCall(MatGetSize(Amat,&M,&N)); 1042 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1043 PetscCall(PetscMalloc1(N-last+first,¬me)); 1044 for (i=0; i<first; i++) notme[i] = i; 1045 for (i=last; i<M; i++) notme[i-last+first] = i; 1046 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1047 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1048 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1049 Aoff = Aoffs[0]; 1050 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1051 Boff = Boffs[0]; 1052 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1053 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1054 PetscCall(MatDestroyMatrices(1,&Boffs)); 1055 PetscCall(ISDestroy(&Me)); 1056 PetscCall(ISDestroy(&Notme)); 1057 PetscCall(PetscFree(notme)); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1062 { 1063 PetscFunctionBegin; 1064 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 /* 1084 This only works correctly for square matrices where the subblock A->A is the 1085 diagonal block 1086 */ 1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 1091 PetscFunctionBegin; 1092 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1093 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1094 PetscCall(MatGetDiagonal(a->A,v)); 1095 PetscFunctionReturn(0); 1096 } 1097 1098 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 1102 PetscFunctionBegin; 1103 PetscCall(MatScale(a->A,aa)); 1104 PetscCall(MatScale(a->B,aa)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1109 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 1113 PetscFunctionBegin; 1114 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1115 PetscCall(PetscFree(aij->Aperm1)); 1116 PetscCall(PetscFree(aij->Bperm1)); 1117 PetscCall(PetscFree(aij->Ajmap1)); 1118 PetscCall(PetscFree(aij->Bjmap1)); 1119 1120 PetscCall(PetscFree(aij->Aimap2)); 1121 PetscCall(PetscFree(aij->Bimap2)); 1122 PetscCall(PetscFree(aij->Aperm2)); 1123 PetscCall(PetscFree(aij->Bperm2)); 1124 PetscCall(PetscFree(aij->Ajmap2)); 1125 PetscCall(PetscFree(aij->Bjmap2)); 1126 1127 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1128 PetscCall(PetscFree(aij->Cperm1)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1133 { 1134 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1135 1136 PetscFunctionBegin; 1137 #if defined(PETSC_USE_LOG) 1138 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1139 #endif 1140 PetscCall(MatStashDestroy_Private(&mat->stash)); 1141 PetscCall(VecDestroy(&aij->diag)); 1142 PetscCall(MatDestroy(&aij->A)); 1143 PetscCall(MatDestroy(&aij->B)); 1144 #if defined(PETSC_USE_CTABLE) 1145 PetscCall(PetscTableDestroy(&aij->colmap)); 1146 #else 1147 PetscCall(PetscFree(aij->colmap)); 1148 #endif 1149 PetscCall(PetscFree(aij->garray)); 1150 PetscCall(VecDestroy(&aij->lvec)); 1151 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1152 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1153 PetscCall(PetscFree(aij->ld)); 1154 1155 /* Free COO */ 1156 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1157 1158 PetscCall(PetscFree(mat->data)); 1159 1160 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1161 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1162 1163 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1173 #if defined(PETSC_HAVE_CUDA) 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1175 #endif 1176 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1178 #endif 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1180 #if defined(PETSC_HAVE_ELEMENTAL) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1182 #endif 1183 #if defined(PETSC_HAVE_SCALAPACK) 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1185 #endif 1186 #if defined(PETSC_HAVE_HYPRE) 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1196 #if defined(PETSC_HAVE_MKL_SPARSE) 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1198 #endif 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1203 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1211 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1212 const PetscInt *garray = aij->garray; 1213 const PetscScalar *aa,*ba; 1214 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 1219 PetscFunctionBegin; 1220 PetscCall(PetscViewerSetUp(viewer)); 1221 1222 M = mat->rmap->N; 1223 N = mat->cmap->N; 1224 m = mat->rmap->n; 1225 rs = mat->rmap->rstart; 1226 cs = mat->cmap->rstart; 1227 nz = A->nz + B->nz; 1228 1229 /* write matrix header */ 1230 header[0] = MAT_FILE_CLASSID; 1231 header[1] = M; header[2] = N; header[3] = nz; 1232 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1233 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1234 1235 /* fill in and store row lengths */ 1236 PetscCall(PetscMalloc1(m,&rowlens)); 1237 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1238 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1239 PetscCall(PetscFree(rowlens)); 1240 1241 /* fill in and store column indices */ 1242 PetscCall(PetscMalloc1(nz,&colidxs)); 1243 for (cnt=0, i=0; i<m; i++) { 1244 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1245 if (garray[B->j[jb]] > cs) break; 1246 colidxs[cnt++] = garray[B->j[jb]]; 1247 } 1248 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1249 colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb<B->i[i+1]; jb++) 1251 colidxs[cnt++] = garray[B->j[jb]]; 1252 } 1253 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1254 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1255 PetscCall(PetscFree(colidxs)); 1256 1257 /* fill in and store nonzero values */ 1258 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1259 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1260 PetscCall(PetscMalloc1(nz,&matvals)); 1261 for (cnt=0, i=0; i<m; i++) { 1262 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1263 if (garray[B->j[jb]] > cs) break; 1264 matvals[cnt++] = ba[jb]; 1265 } 1266 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1267 matvals[cnt++] = aa[ja]; 1268 for (; jb<B->i[i+1]; jb++) 1269 matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1273 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1279 PetscFunctionReturn(0); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscMPIInt rank = aij->rank,size = aij->size; 1287 PetscBool isdraw,iascii,isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer,&format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size,&nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1301 for (i=0; i<(PetscInt)size; i++) { 1302 nmax = PetscMax(nmax,nz[i]); 1303 nmin = PetscMin(nmin,nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg/size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1309 PetscFunctionReturn(0); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer,&format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes=NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1317 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1335 PetscFunctionReturn(0); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount,inodelimit,*inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(0); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A,viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1354 } 1355 PetscFunctionReturn(0); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A,viewer)); 1359 PetscFunctionReturn(0); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1364 PetscCall(PetscDrawIsNull(draw,&isnull)); 1365 if (isnull) PetscFunctionReturn(0); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow,iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1374 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) { 1400 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1401 } 1402 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1403 } 1404 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1405 PetscCall(PetscViewerFlush(viewer)); 1406 PetscCall(MatDestroy(&A)); 1407 } 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1412 { 1413 PetscBool iascii,isdraw,issocket,isbinary; 1414 1415 PetscFunctionBegin; 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1419 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1420 if (iascii || isdraw || isbinary || issocket) { 1421 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1427 { 1428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1429 Vec bb1 = NULL; 1430 PetscBool hasop; 1431 1432 PetscFunctionBegin; 1433 if (flag == SOR_APPLY_UPPER) { 1434 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1439 PetscCall(VecDuplicate(bb,&bb1)); 1440 } 1441 1442 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1445 its--; 1446 } 1447 1448 while (its--) { 1449 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1451 1452 /* update rhs: bb1 = bb - B*x */ 1453 PetscCall(VecScale(mat->lvec,-1.0)); 1454 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1455 1456 /* local sweep */ 1457 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1458 } 1459 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1460 if (flag & SOR_ZERO_INITIAL_GUESS) { 1461 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1462 its--; 1463 } 1464 while (its--) { 1465 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 PetscCall(VecScale(mat->lvec,-1.0)); 1470 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1471 1472 /* local sweep */ 1473 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1474 } 1475 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1478 its--; 1479 } 1480 while (its--) { 1481 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 PetscCall(VecScale(mat->lvec,-1.0)); 1486 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1487 1488 /* local sweep */ 1489 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1490 } 1491 } else if (flag & SOR_EISENSTAT) { 1492 Vec xx1; 1493 1494 PetscCall(VecDuplicate(bb,&xx1)); 1495 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1496 1497 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1499 if (!mat->diag) { 1500 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1501 PetscCall(MatGetDiagonal(matin,mat->diag)); 1502 } 1503 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1504 if (hasop) { 1505 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1506 } else { 1507 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1508 } 1509 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1510 1511 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1512 1513 /* local sweep */ 1514 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1515 PetscCall(VecAXPY(xx,1.0,xx1)); 1516 PetscCall(VecDestroy(&xx1)); 1517 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1518 1519 PetscCall(VecDestroy(&bb1)); 1520 1521 matin->factorerrortype = mat->A->factorerrortype; 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1526 { 1527 Mat aA,aB,Aperm; 1528 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1529 PetscScalar *aa,*ba; 1530 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1531 PetscSF rowsf,sf; 1532 IS parcolp = NULL; 1533 PetscBool done; 1534 1535 PetscFunctionBegin; 1536 PetscCall(MatGetLocalSize(A,&m,&n)); 1537 PetscCall(ISGetIndices(rowp,&rwant)); 1538 PetscCall(ISGetIndices(colp,&cwant)); 1539 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1540 1541 /* Invert row permutation to find out where my rows should go */ 1542 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1543 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1544 PetscCall(PetscSFSetFromOptions(rowsf)); 1545 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1546 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1548 1549 /* Invert column permutation to find out where my columns should go */ 1550 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1551 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1552 PetscCall(PetscSFSetFromOptions(sf)); 1553 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1554 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(ISRestoreIndices(rowp,&rwant)); 1559 PetscCall(ISRestoreIndices(colp,&cwant)); 1560 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1561 1562 /* Find out where my gcols should go */ 1563 PetscCall(MatGetSize(aB,NULL,&ng)); 1564 PetscCall(PetscMalloc1(ng,&gcdest)); 1565 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1566 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1567 PetscCall(PetscSFSetFromOptions(sf)); 1568 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&sf)); 1571 1572 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1573 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1574 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1575 for (i=0; i<m; i++) { 1576 PetscInt row = rdest[i]; 1577 PetscMPIInt rowner; 1578 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1579 for (j=ai[i]; j<ai[i+1]; j++) { 1580 PetscInt col = cdest[aj[j]]; 1581 PetscMPIInt cowner; 1582 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 for (j=bi[i]; j<bi[i+1]; j++) { 1587 PetscInt col = gcdest[bj[j]]; 1588 PetscMPIInt cowner; 1589 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1590 if (rowner == cowner) dnnz[i]++; 1591 else onnz[i]++; 1592 } 1593 } 1594 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1598 PetscCall(PetscSFDestroy(&rowsf)); 1599 1600 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1601 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1602 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1603 for (i=0; i<m; i++) { 1604 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1605 PetscInt j0,rowlen; 1606 rowlen = ai[i+1] - ai[i]; 1607 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1608 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1609 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1610 } 1611 rowlen = bi[i+1] - bi[i]; 1612 for (j0=j=0; j<rowlen; j0=j) { 1613 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1614 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1615 } 1616 } 1617 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1619 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1620 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1621 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1622 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1623 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1624 PetscCall(PetscFree3(work,rdest,cdest)); 1625 PetscCall(PetscFree(gcdest)); 1626 if (parcolp) PetscCall(ISDestroy(&colp)); 1627 *B = Aperm; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1632 { 1633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1634 1635 PetscFunctionBegin; 1636 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1637 if (ghosts) *ghosts = aij->garray; 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1642 { 1643 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1644 Mat A = mat->A,B = mat->B; 1645 PetscLogDouble isend[5],irecv[5]; 1646 1647 PetscFunctionBegin; 1648 info->block_size = 1.0; 1649 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1650 1651 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1652 isend[3] = info->memory; isend[4] = info->mallocs; 1653 1654 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1655 1656 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1657 isend[3] += info->memory; isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(0); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A,1); 1702 PetscCall(MatSetOption(a->A,op,flg)); 1703 PetscCall(MatSetOption(a->B,op,flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A,1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A,op,flg)); 1710 PetscCall(MatSetOption(a->B,op,flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1726 case MAT_SPD_ETERNAL: 1727 /* if the diagonal matrix is square it inherits some of the properties above */ 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1737 } 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1745 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1746 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1747 PetscInt *cmap,*idx_p; 1748 1749 PetscFunctionBegin; 1750 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1751 mat->getrowactive = PETSC_TRUE; 1752 1753 if (!mat->rowvalues && (idx || v)) { 1754 /* 1755 allocate enough space to hold information from the longest row. 1756 */ 1757 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1758 PetscInt max = 1,tmp; 1759 for (i=0; i<matin->rmap->n; i++) { 1760 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1761 if (max < tmp) max = tmp; 1762 } 1763 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1764 } 1765 1766 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1767 lrow = row - rstart; 1768 1769 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1770 if (!v) {pvA = NULL; pvB = NULL;} 1771 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1772 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1773 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1774 nztot = nzA + nzB; 1775 1776 cmap = mat->garray; 1777 if (v || idx) { 1778 if (nztot) { 1779 /* Sort by increasing column numbers, assuming A and B already sorted */ 1780 PetscInt imark = -1; 1781 if (v) { 1782 *v = v_p = mat->rowvalues; 1783 for (i=0; i<nzB; i++) { 1784 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1785 else break; 1786 } 1787 imark = i; 1788 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1789 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1790 } 1791 if (idx) { 1792 *idx = idx_p = mat->rowindices; 1793 if (imark > -1) { 1794 for (i=0; i<imark; i++) { 1795 idx_p[i] = cmap[cworkB[i]]; 1796 } 1797 } else { 1798 for (i=0; i<nzB; i++) { 1799 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1800 else break; 1801 } 1802 imark = i; 1803 } 1804 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1805 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1806 } 1807 } else { 1808 if (idx) *idx = NULL; 1809 if (v) *v = NULL; 1810 } 1811 } 1812 *nz = nztot; 1813 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1814 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1815 PetscFunctionReturn(0); 1816 } 1817 1818 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1821 1822 PetscFunctionBegin; 1823 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1824 aij->getrowactive = PETSC_FALSE; 1825 PetscFunctionReturn(0); 1826 } 1827 1828 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1829 { 1830 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1831 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1832 PetscInt i,j,cstart = mat->cmap->rstart; 1833 PetscReal sum = 0.0; 1834 const MatScalar *v,*amata,*bmata; 1835 1836 PetscFunctionBegin; 1837 if (aij->size == 1) { 1838 PetscCall(MatNorm(aij->A,type,norm)); 1839 } else { 1840 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1841 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1842 if (type == NORM_FROBENIUS) { 1843 v = amata; 1844 for (i=0; i<amat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 v = bmata; 1848 for (i=0; i<bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1850 } 1851 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1852 *norm = PetscSqrtReal(*norm); 1853 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1854 } else if (type == NORM_1) { /* max column norm */ 1855 PetscReal *tmp,*tmp2; 1856 PetscInt *jj,*garray = aij->garray; 1857 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1858 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1859 *norm = 0.0; 1860 v = amata; jj = amat->j; 1861 for (j=0; j<amat->nz; j++) { 1862 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1863 } 1864 v = bmata; jj = bmat->j; 1865 for (j=0; j<bmat->nz; j++) { 1866 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1867 } 1868 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1869 for (j=0; j<mat->cmap->N; j++) { 1870 if (tmp2[j] > *norm) *norm = tmp2[j]; 1871 } 1872 PetscCall(PetscFree(tmp)); 1873 PetscCall(PetscFree(tmp2)); 1874 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1875 } else if (type == NORM_INFINITY) { /* max row norm */ 1876 PetscReal ntemp = 0.0; 1877 for (j=0; j<aij->A->rmap->n; j++) { 1878 v = amata + amat->i[j]; 1879 sum = 0.0; 1880 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 v = bmata + bmat->i[j]; 1884 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1885 sum += PetscAbsScalar(*v); v++; 1886 } 1887 if (sum > ntemp) ntemp = sum; 1888 } 1889 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1890 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1891 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1892 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1893 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1894 } 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1899 { 1900 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1901 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1902 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1903 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1904 Mat B,A_diag,*B_diag; 1905 const MatScalar *pbv,*bv; 1906 1907 PetscFunctionBegin; 1908 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*matout)); 1909 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1910 ai = Aloc->i; aj = Aloc->j; 1911 bi = Bloc->i; bj = Bloc->j; 1912 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1913 PetscInt *d_nnz,*g_nnz,*o_nnz; 1914 PetscSFNode *oloc; 1915 PETSC_UNUSED PetscSF sf; 1916 1917 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1918 /* compute d_nnz for preallocation */ 1919 PetscCall(PetscArrayzero(d_nnz,na)); 1920 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1921 /* compute local off-diagonal contributions */ 1922 PetscCall(PetscArrayzero(g_nnz,nb)); 1923 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1924 /* map those to global */ 1925 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1926 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1927 PetscCall(PetscSFSetFromOptions(sf)); 1928 PetscCall(PetscArrayzero(o_nnz,na)); 1929 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1930 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1931 PetscCall(PetscSFDestroy(&sf)); 1932 1933 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1934 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1935 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1936 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1937 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1938 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1939 } else { 1940 B = *matout; 1941 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1942 } 1943 1944 b = (Mat_MPIAIJ*)B->data; 1945 A_diag = a->A; 1946 B_diag = &b->A; 1947 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1948 A_diag_ncol = A_diag->cmap->N; 1949 B_diag_ilen = sub_B_diag->ilen; 1950 B_diag_i = sub_B_diag->i; 1951 1952 /* Set ilen for diagonal of B */ 1953 for (i=0; i<A_diag_ncol; i++) { 1954 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1955 } 1956 1957 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1958 very quickly (=without using MatSetValues), because all writes are local. */ 1959 PetscCall(MatTransposeSetPrecursor(A_diag,*B_diag)); 1960 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1961 1962 /* copy over the B part */ 1963 PetscCall(PetscMalloc1(bi[mb],&cols)); 1964 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i=0; i<mb; i++) { 1970 ncol = bi[i+1]-bi[i]; 1971 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1972 row++; 1973 pbv += ncol; cols_tmp += ncol; 1974 } 1975 PetscCall(PetscFree(cols)); 1976 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1977 1978 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1979 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1980 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1981 *matout = B; 1982 } else { 1983 PetscCall(MatHeaderMerge(A,&B)); 1984 } 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1989 { 1990 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1991 Mat a = aij->A,b = aij->B; 1992 PetscInt s1,s2,s3; 1993 1994 PetscFunctionBegin; 1995 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1996 if (rr) { 1997 PetscCall(VecGetLocalSize(rr,&s1)); 1998 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1999 /* Overlap communication with computation. */ 2000 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2001 } 2002 if (ll) { 2003 PetscCall(VecGetLocalSize(ll,&s1)); 2004 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2005 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2006 } 2007 /* scale the diagonal block */ 2008 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2009 2010 if (rr) { 2011 /* Do a scatter end and then right scale the off-diagonal block */ 2012 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2013 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2019 { 2020 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2021 2022 PetscFunctionBegin; 2023 PetscCall(MatSetUnfactored(a->A)); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2028 { 2029 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2030 Mat a,b,c,d; 2031 PetscBool flg; 2032 2033 PetscFunctionBegin; 2034 a = matA->A; b = matA->B; 2035 c = matB->A; d = matB->B; 2036 2037 PetscCall(MatEqual(a,c,&flg)); 2038 if (flg) { 2039 PetscCall(MatEqual(b,d,&flg)); 2040 } 2041 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2042 PetscFunctionReturn(0); 2043 } 2044 2045 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2046 { 2047 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2048 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2049 2050 PetscFunctionBegin; 2051 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2052 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2053 /* because of the column compression in the off-processor part of the matrix a->B, 2054 the number of columns in a->B and b->B may be different, hence we cannot call 2055 the MatCopy() directly on the two parts. If need be, we can provide a more 2056 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2057 then copying the submatrices */ 2058 PetscCall(MatCopy_Basic(A,B,str)); 2059 } else { 2060 PetscCall(MatCopy(a->A,b->A,str)); 2061 PetscCall(MatCopy(a->B,b->B,str)); 2062 } 2063 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2068 { 2069 PetscFunctionBegin; 2070 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 /* 2075 Computes the number of nonzeros per row needed for preallocation when X and Y 2076 have different nonzero structure. 2077 */ 2078 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2079 { 2080 PetscInt i,j,k,nzx,nzy; 2081 2082 PetscFunctionBegin; 2083 /* Set the number of nonzeros in the new matrix */ 2084 for (i=0; i<m; i++) { 2085 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2086 nzx = xi[i+1] - xi[i]; 2087 nzy = yi[i+1] - yi[i]; 2088 nnz[i] = 0; 2089 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2090 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2091 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2092 nnz[i]++; 2093 } 2094 for (; k<nzy; k++) nnz[i]++; 2095 } 2096 PetscFunctionReturn(0); 2097 } 2098 2099 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2100 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2101 { 2102 PetscInt m = Y->rmap->N; 2103 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2104 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2105 2106 PetscFunctionBegin; 2107 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2108 PetscFunctionReturn(0); 2109 } 2110 2111 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2112 { 2113 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2114 2115 PetscFunctionBegin; 2116 if (str == SAME_NONZERO_PATTERN) { 2117 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2118 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2119 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2120 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2121 } else { 2122 Mat B; 2123 PetscInt *nnz_d,*nnz_o; 2124 2125 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2126 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2127 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2128 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2129 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2130 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2131 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2132 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2133 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2134 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2135 PetscCall(MatHeaderMerge(Y,&B)); 2136 PetscCall(PetscFree(nnz_d)); 2137 PetscCall(PetscFree(nnz_o)); 2138 } 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2143 2144 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2145 { 2146 PetscFunctionBegin; 2147 if (PetscDefined(USE_COMPLEX)) { 2148 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2149 2150 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2151 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2152 } 2153 PetscFunctionReturn(0); 2154 } 2155 2156 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2157 { 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2159 2160 PetscFunctionBegin; 2161 PetscCall(MatRealPart(a->A)); 2162 PetscCall(MatRealPart(a->B)); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2167 { 2168 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2169 2170 PetscFunctionBegin; 2171 PetscCall(MatImaginaryPart(a->A)); 2172 PetscCall(MatImaginaryPart(a->B)); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2177 { 2178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2179 PetscInt i,*idxb = NULL,m = A->rmap->n; 2180 PetscScalar *va,*vv; 2181 Vec vB,vA; 2182 const PetscScalar *vb; 2183 2184 PetscFunctionBegin; 2185 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2186 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2187 2188 PetscCall(VecGetArrayWrite(vA,&va)); 2189 if (idx) { 2190 for (i=0; i<m; i++) { 2191 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2192 } 2193 } 2194 2195 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2196 PetscCall(PetscMalloc1(m,&idxb)); 2197 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2198 2199 PetscCall(VecGetArrayWrite(v,&vv)); 2200 PetscCall(VecGetArrayRead(vB,&vb)); 2201 for (i=0; i<m; i++) { 2202 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2203 vv[i] = vb[i]; 2204 if (idx) idx[i] = a->garray[idxb[i]]; 2205 } else { 2206 vv[i] = va[i]; 2207 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2208 idx[i] = a->garray[idxb[i]]; 2209 } 2210 } 2211 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2212 PetscCall(VecRestoreArrayWrite(vA,&va)); 2213 PetscCall(VecRestoreArrayRead(vB,&vb)); 2214 PetscCall(PetscFree(idxb)); 2215 PetscCall(VecDestroy(&vA)); 2216 PetscCall(VecDestroy(&vB)); 2217 PetscFunctionReturn(0); 2218 } 2219 2220 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2221 { 2222 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2223 PetscInt m = A->rmap->n,n = A->cmap->n; 2224 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2225 PetscInt *cmap = mat->garray; 2226 PetscInt *diagIdx, *offdiagIdx; 2227 Vec diagV, offdiagV; 2228 PetscScalar *a, *diagA, *offdiagA; 2229 const PetscScalar *ba,*bav; 2230 PetscInt r,j,col,ncols,*bi,*bj; 2231 Mat B = mat->B; 2232 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2233 2234 PetscFunctionBegin; 2235 /* When a process holds entire A and other processes have no entry */ 2236 if (A->cmap->N == n) { 2237 PetscCall(VecGetArrayWrite(v,&diagA)); 2238 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2239 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2240 PetscCall(VecDestroy(&diagV)); 2241 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2242 PetscFunctionReturn(0); 2243 } else if (n == 0) { 2244 if (m) { 2245 PetscCall(VecGetArrayWrite(v,&a)); 2246 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2247 PetscCall(VecRestoreArrayWrite(v,&a)); 2248 } 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2253 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2255 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2256 2257 /* Get offdiagIdx[] for implicit 0.0 */ 2258 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2259 ba = bav; 2260 bi = b->i; 2261 bj = b->j; 2262 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2263 for (r = 0; r < m; r++) { 2264 ncols = bi[r+1] - bi[r]; 2265 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2266 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2267 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2268 offdiagA[r] = 0.0; 2269 2270 /* Find first hole in the cmap */ 2271 for (j=0; j<ncols; j++) { 2272 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2273 if (col > j && j < cstart) { 2274 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2275 break; 2276 } else if (col > j + n && j >= cstart) { 2277 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2278 break; 2279 } 2280 } 2281 if (j == ncols && ncols < A->cmap->N - n) { 2282 /* a hole is outside compressed Bcols */ 2283 if (ncols == 0) { 2284 if (cstart) { 2285 offdiagIdx[r] = 0; 2286 } else offdiagIdx[r] = cend; 2287 } else { /* ncols > 0 */ 2288 offdiagIdx[r] = cmap[ncols-1] + 1; 2289 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2290 } 2291 } 2292 } 2293 2294 for (j=0; j<ncols; j++) { 2295 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2296 ba++; bj++; 2297 } 2298 } 2299 2300 PetscCall(VecGetArrayWrite(v, &a)); 2301 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2302 for (r = 0; r < m; ++r) { 2303 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) idx[r] = cstart + diagIdx[r]; 2306 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) { 2309 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2310 idx[r] = cstart + diagIdx[r]; 2311 } else idx[r] = offdiagIdx[r]; 2312 } 2313 } else { 2314 a[r] = offdiagA[r]; 2315 if (idx) idx[r] = offdiagIdx[r]; 2316 } 2317 } 2318 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2319 PetscCall(VecRestoreArrayWrite(v, &a)); 2320 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2321 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2322 PetscCall(VecDestroy(&diagV)); 2323 PetscCall(VecDestroy(&offdiagV)); 2324 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2329 { 2330 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2331 PetscInt m = A->rmap->n,n = A->cmap->n; 2332 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2333 PetscInt *cmap = mat->garray; 2334 PetscInt *diagIdx, *offdiagIdx; 2335 Vec diagV, offdiagV; 2336 PetscScalar *a, *diagA, *offdiagA; 2337 const PetscScalar *ba,*bav; 2338 PetscInt r,j,col,ncols,*bi,*bj; 2339 Mat B = mat->B; 2340 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2341 2342 PetscFunctionBegin; 2343 /* When a process holds entire A and other processes have no entry */ 2344 if (A->cmap->N == n) { 2345 PetscCall(VecGetArrayWrite(v,&diagA)); 2346 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2347 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2348 PetscCall(VecDestroy(&diagV)); 2349 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2350 PetscFunctionReturn(0); 2351 } else if (n == 0) { 2352 if (m) { 2353 PetscCall(VecGetArrayWrite(v,&a)); 2354 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2355 PetscCall(VecRestoreArrayWrite(v,&a)); 2356 } 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2361 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2364 2365 /* Get offdiagIdx[] for implicit 0.0 */ 2366 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2367 ba = bav; 2368 bi = b->i; 2369 bj = b->j; 2370 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2371 for (r = 0; r < m; r++) { 2372 ncols = bi[r+1] - bi[r]; 2373 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2374 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2375 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2376 offdiagA[r] = 0.0; 2377 2378 /* Find first hole in the cmap */ 2379 for (j=0; j<ncols; j++) { 2380 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2381 if (col > j && j < cstart) { 2382 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2383 break; 2384 } else if (col > j + n && j >= cstart) { 2385 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2386 break; 2387 } 2388 } 2389 if (j == ncols && ncols < A->cmap->N - n) { 2390 /* a hole is outside compressed Bcols */ 2391 if (ncols == 0) { 2392 if (cstart) { 2393 offdiagIdx[r] = 0; 2394 } else offdiagIdx[r] = cend; 2395 } else { /* ncols > 0 */ 2396 offdiagIdx[r] = cmap[ncols-1] + 1; 2397 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2398 } 2399 } 2400 } 2401 2402 for (j=0; j<ncols; j++) { 2403 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2404 ba++; bj++; 2405 } 2406 } 2407 2408 PetscCall(VecGetArrayWrite(v, &a)); 2409 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2410 for (r = 0; r < m; ++r) { 2411 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2412 a[r] = diagA[r]; 2413 if (idx) idx[r] = cstart + diagIdx[r]; 2414 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) { 2417 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2418 idx[r] = cstart + diagIdx[r]; 2419 } else idx[r] = offdiagIdx[r]; 2420 } 2421 } else { 2422 a[r] = offdiagA[r]; 2423 if (idx) idx[r] = offdiagIdx[r]; 2424 } 2425 } 2426 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2427 PetscCall(VecRestoreArrayWrite(v, &a)); 2428 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2429 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2430 PetscCall(VecDestroy(&diagV)); 2431 PetscCall(VecDestroy(&offdiagV)); 2432 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2437 { 2438 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2439 PetscInt m = A->rmap->n,n = A->cmap->n; 2440 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2441 PetscInt *cmap = mat->garray; 2442 PetscInt *diagIdx, *offdiagIdx; 2443 Vec diagV, offdiagV; 2444 PetscScalar *a, *diagA, *offdiagA; 2445 const PetscScalar *ba,*bav; 2446 PetscInt r,j,col,ncols,*bi,*bj; 2447 Mat B = mat->B; 2448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2449 2450 PetscFunctionBegin; 2451 /* When a process holds entire A and other processes have no entry */ 2452 if (A->cmap->N == n) { 2453 PetscCall(VecGetArrayWrite(v,&diagA)); 2454 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2455 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2458 PetscFunctionReturn(0); 2459 } else if (n == 0) { 2460 if (m) { 2461 PetscCall(VecGetArrayWrite(v,&a)); 2462 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2463 PetscCall(VecRestoreArrayWrite(v,&a)); 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2469 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2470 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2471 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2472 2473 /* Get offdiagIdx[] for implicit 0.0 */ 2474 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2475 ba = bav; 2476 bi = b->i; 2477 bj = b->j; 2478 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2479 for (r = 0; r < m; r++) { 2480 ncols = bi[r+1] - bi[r]; 2481 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2482 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2483 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2484 offdiagA[r] = 0.0; 2485 2486 /* Find first hole in the cmap */ 2487 for (j=0; j<ncols; j++) { 2488 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2489 if (col > j && j < cstart) { 2490 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2491 break; 2492 } else if (col > j + n && j >= cstart) { 2493 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2494 break; 2495 } 2496 } 2497 if (j == ncols && ncols < A->cmap->N - n) { 2498 /* a hole is outside compressed Bcols */ 2499 if (ncols == 0) { 2500 if (cstart) { 2501 offdiagIdx[r] = 0; 2502 } else offdiagIdx[r] = cend; 2503 } else { /* ncols > 0 */ 2504 offdiagIdx[r] = cmap[ncols-1] + 1; 2505 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2506 } 2507 } 2508 } 2509 2510 for (j=0; j<ncols; j++) { 2511 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2512 ba++; bj++; 2513 } 2514 } 2515 2516 PetscCall(VecGetArrayWrite(v, &a)); 2517 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2518 for (r = 0; r < m; ++r) { 2519 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2520 a[r] = diagA[r]; 2521 if (idx) idx[r] = cstart + diagIdx[r]; 2522 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2523 a[r] = diagA[r]; 2524 if (idx) { 2525 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2526 idx[r] = cstart + diagIdx[r]; 2527 } else idx[r] = offdiagIdx[r]; 2528 } 2529 } else { 2530 a[r] = offdiagA[r]; 2531 if (idx) idx[r] = offdiagIdx[r]; 2532 } 2533 } 2534 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2535 PetscCall(VecRestoreArrayWrite(v, &a)); 2536 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2537 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2538 PetscCall(VecDestroy(&diagV)); 2539 PetscCall(VecDestroy(&offdiagV)); 2540 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2545 { 2546 Mat *dummy; 2547 2548 PetscFunctionBegin; 2549 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2550 *newmat = *dummy; 2551 PetscCall(PetscFree(dummy)); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2556 { 2557 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2558 2559 PetscFunctionBegin; 2560 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2561 A->factorerrortype = a->A->factorerrortype; 2562 PetscFunctionReturn(0); 2563 } 2564 2565 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2566 { 2567 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2568 2569 PetscFunctionBegin; 2570 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2571 PetscCall(MatSetRandom(aij->A,rctx)); 2572 if (x->assembled) { 2573 PetscCall(MatSetRandom(aij->B,rctx)); 2574 } else { 2575 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2576 } 2577 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2578 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2579 PetscFunctionReturn(0); 2580 } 2581 2582 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2583 { 2584 PetscFunctionBegin; 2585 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2586 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 /*@ 2591 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2592 2593 Not collective 2594 2595 Input Parameter: 2596 . A - the matrix 2597 2598 Output Parameter: 2599 . nz - the number of nonzeros 2600 2601 Level: advanced 2602 2603 @*/ 2604 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A,PetscCount *nz) 2605 { 2606 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)A->data; 2607 Mat_SeqAIJ *aaij = (Mat_SeqAIJ*)maij->A->data, *baij = (Mat_SeqAIJ*)maij->B->data; 2608 2609 PetscFunctionBegin; 2610 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 /*@ 2615 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2616 2617 Collective on Mat 2618 2619 Input Parameters: 2620 + A - the matrix 2621 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2622 2623 Level: advanced 2624 2625 @*/ 2626 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2627 { 2628 PetscFunctionBegin; 2629 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2634 { 2635 PetscBool sc = PETSC_FALSE,flg; 2636 2637 PetscFunctionBegin; 2638 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2639 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2640 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2641 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2642 PetscOptionsHeadEnd(); 2643 PetscFunctionReturn(0); 2644 } 2645 2646 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2647 { 2648 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2649 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2650 2651 PetscFunctionBegin; 2652 if (!Y->preallocated) { 2653 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2654 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2655 PetscInt nonew = aij->nonew; 2656 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2657 aij->nonew = nonew; 2658 } 2659 PetscCall(MatShift_Basic(Y,a)); 2660 PetscFunctionReturn(0); 2661 } 2662 2663 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2664 { 2665 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2666 2667 PetscFunctionBegin; 2668 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2669 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2670 if (d) { 2671 PetscInt rstart; 2672 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2673 *d += rstart; 2674 2675 } 2676 PetscFunctionReturn(0); 2677 } 2678 2679 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2680 { 2681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2682 2683 PetscFunctionBegin; 2684 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2685 PetscFunctionReturn(0); 2686 } 2687 2688 /* -------------------------------------------------------------------*/ 2689 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2690 MatGetRow_MPIAIJ, 2691 MatRestoreRow_MPIAIJ, 2692 MatMult_MPIAIJ, 2693 /* 4*/ MatMultAdd_MPIAIJ, 2694 MatMultTranspose_MPIAIJ, 2695 MatMultTransposeAdd_MPIAIJ, 2696 NULL, 2697 NULL, 2698 NULL, 2699 /*10*/ NULL, 2700 NULL, 2701 NULL, 2702 MatSOR_MPIAIJ, 2703 MatTranspose_MPIAIJ, 2704 /*15*/ MatGetInfo_MPIAIJ, 2705 MatEqual_MPIAIJ, 2706 MatGetDiagonal_MPIAIJ, 2707 MatDiagonalScale_MPIAIJ, 2708 MatNorm_MPIAIJ, 2709 /*20*/ MatAssemblyBegin_MPIAIJ, 2710 MatAssemblyEnd_MPIAIJ, 2711 MatSetOption_MPIAIJ, 2712 MatZeroEntries_MPIAIJ, 2713 /*24*/ MatZeroRows_MPIAIJ, 2714 NULL, 2715 NULL, 2716 NULL, 2717 NULL, 2718 /*29*/ MatSetUp_MPIAIJ, 2719 NULL, 2720 NULL, 2721 MatGetDiagonalBlock_MPIAIJ, 2722 NULL, 2723 /*34*/ MatDuplicate_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*39*/ MatAXPY_MPIAIJ, 2729 MatCreateSubMatrices_MPIAIJ, 2730 MatIncreaseOverlap_MPIAIJ, 2731 MatGetValues_MPIAIJ, 2732 MatCopy_MPIAIJ, 2733 /*44*/ MatGetRowMax_MPIAIJ, 2734 MatScale_MPIAIJ, 2735 MatShift_MPIAIJ, 2736 MatDiagonalSet_MPIAIJ, 2737 MatZeroRowsColumns_MPIAIJ, 2738 /*49*/ MatSetRandom_MPIAIJ, 2739 MatGetRowIJ_MPIAIJ, 2740 MatRestoreRowIJ_MPIAIJ, 2741 NULL, 2742 NULL, 2743 /*54*/ MatFDColoringCreate_MPIXAIJ, 2744 NULL, 2745 MatSetUnfactored_MPIAIJ, 2746 MatPermute_MPIAIJ, 2747 NULL, 2748 /*59*/ MatCreateSubMatrix_MPIAIJ, 2749 MatDestroy_MPIAIJ, 2750 MatView_MPIAIJ, 2751 NULL, 2752 NULL, 2753 /*64*/ NULL, 2754 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2759 MatGetRowMinAbs_MPIAIJ, 2760 NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 /*75*/ MatFDColoringApply_AIJ, 2765 MatSetFromOptions_MPIAIJ, 2766 NULL, 2767 NULL, 2768 MatFindZeroDiagonals_MPIAIJ, 2769 /*80*/ NULL, 2770 NULL, 2771 NULL, 2772 /*83*/ MatLoad_MPIAIJ, 2773 MatIsSymmetric_MPIAIJ, 2774 NULL, 2775 NULL, 2776 NULL, 2777 NULL, 2778 /*89*/ NULL, 2779 NULL, 2780 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2781 NULL, 2782 NULL, 2783 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 MatBindToCPU_MPIAIJ, 2788 /*99*/ MatProductSetFromOptions_MPIAIJ, 2789 NULL, 2790 NULL, 2791 MatConjugate_MPIAIJ, 2792 NULL, 2793 /*104*/MatSetValuesRow_MPIAIJ, 2794 MatRealPart_MPIAIJ, 2795 MatImaginaryPart_MPIAIJ, 2796 NULL, 2797 NULL, 2798 /*109*/NULL, 2799 NULL, 2800 MatGetRowMin_MPIAIJ, 2801 NULL, 2802 MatMissingDiagonal_MPIAIJ, 2803 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2804 NULL, 2805 MatGetGhosts_MPIAIJ, 2806 NULL, 2807 NULL, 2808 /*119*/MatMultDiagonalBlock_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 MatGetMultiProcBlock_MPIAIJ, 2813 /*124*/MatFindNonzeroRows_MPIAIJ, 2814 MatGetColumnReductions_MPIAIJ, 2815 MatInvertBlockDiagonal_MPIAIJ, 2816 MatInvertVariableBlockDiagonal_MPIAIJ, 2817 MatCreateSubMatricesMPI_MPIAIJ, 2818 /*129*/NULL, 2819 NULL, 2820 NULL, 2821 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2822 NULL, 2823 /*134*/NULL, 2824 NULL, 2825 NULL, 2826 NULL, 2827 NULL, 2828 /*139*/MatSetBlockSizes_MPIAIJ, 2829 NULL, 2830 NULL, 2831 MatFDColoringSetUp_MPIXAIJ, 2832 MatFindOffBlockDiagonalEntries_MPIAIJ, 2833 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2834 /*145*/NULL, 2835 NULL, 2836 NULL, 2837 MatCreateGraph_Simple_AIJ, 2838 MatFilter_AIJ, 2839 /*150*/NULL 2840 }; 2841 2842 /* ----------------------------------------------------------------------------------------*/ 2843 2844 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2845 { 2846 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2847 2848 PetscFunctionBegin; 2849 PetscCall(MatStoreValues(aij->A)); 2850 PetscCall(MatStoreValues(aij->B)); 2851 PetscFunctionReturn(0); 2852 } 2853 2854 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2855 { 2856 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2857 2858 PetscFunctionBegin; 2859 PetscCall(MatRetrieveValues(aij->A)); 2860 PetscCall(MatRetrieveValues(aij->B)); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2865 { 2866 Mat_MPIAIJ *b; 2867 PetscMPIInt size; 2868 2869 PetscFunctionBegin; 2870 PetscCall(PetscLayoutSetUp(B->rmap)); 2871 PetscCall(PetscLayoutSetUp(B->cmap)); 2872 b = (Mat_MPIAIJ*)B->data; 2873 2874 #if defined(PETSC_USE_CTABLE) 2875 PetscCall(PetscTableDestroy(&b->colmap)); 2876 #else 2877 PetscCall(PetscFree(b->colmap)); 2878 #endif 2879 PetscCall(PetscFree(b->garray)); 2880 PetscCall(VecDestroy(&b->lvec)); 2881 PetscCall(VecScatterDestroy(&b->Mvctx)); 2882 2883 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2884 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2885 PetscCall(MatDestroy(&b->B)); 2886 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2887 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2888 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2889 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2890 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2891 2892 if (!B->preallocated) { 2893 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2894 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2895 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2896 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2897 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2898 } 2899 2900 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2901 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2902 B->preallocated = PETSC_TRUE; 2903 B->was_assembled = PETSC_FALSE; 2904 B->assembled = PETSC_FALSE; 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2909 { 2910 Mat_MPIAIJ *b; 2911 2912 PetscFunctionBegin; 2913 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 b = (Mat_MPIAIJ*)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 PetscCall(PetscTableDestroy(&b->colmap)); 2920 #else 2921 PetscCall(PetscFree(b->colmap)); 2922 #endif 2923 PetscCall(PetscFree(b->garray)); 2924 PetscCall(VecDestroy(&b->lvec)); 2925 PetscCall(VecScatterDestroy(&b->Mvctx)); 2926 2927 PetscCall(MatResetPreallocation(b->A)); 2928 PetscCall(MatResetPreallocation(b->B)); 2929 B->preallocated = PETSC_TRUE; 2930 B->was_assembled = PETSC_FALSE; 2931 B->assembled = PETSC_FALSE; 2932 PetscFunctionReturn(0); 2933 } 2934 2935 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2936 { 2937 Mat mat; 2938 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2939 2940 PetscFunctionBegin; 2941 *newmat = NULL; 2942 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2943 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2944 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2945 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2946 a = (Mat_MPIAIJ*)mat->data; 2947 2948 mat->factortype = matin->factortype; 2949 mat->assembled = matin->assembled; 2950 mat->insertmode = NOT_SET_VALUES; 2951 mat->preallocated = matin->preallocated; 2952 2953 a->size = oldmat->size; 2954 a->rank = oldmat->rank; 2955 a->donotstash = oldmat->donotstash; 2956 a->roworiented = oldmat->roworiented; 2957 a->rowindices = NULL; 2958 a->rowvalues = NULL; 2959 a->getrowactive = PETSC_FALSE; 2960 2961 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2962 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2963 2964 if (oldmat->colmap) { 2965 #if defined(PETSC_USE_CTABLE) 2966 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2967 #else 2968 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2969 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2970 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2971 #endif 2972 } else a->colmap = NULL; 2973 if (oldmat->garray) { 2974 PetscInt len; 2975 len = oldmat->B->cmap->n; 2976 PetscCall(PetscMalloc1(len+1,&a->garray)); 2977 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2978 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2979 } else a->garray = NULL; 2980 2981 /* It may happen MatDuplicate is called with a non-assembled matrix 2982 In fact, MatDuplicate only requires the matrix to be preallocated 2983 This may happen inside a DMCreateMatrix_Shell */ 2984 if (oldmat->lvec) { 2985 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2986 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2987 } 2988 if (oldmat->Mvctx) { 2989 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2990 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2991 } 2992 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2993 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2994 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2995 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2996 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2997 *newmat = mat; 2998 PetscFunctionReturn(0); 2999 } 3000 3001 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3002 { 3003 PetscBool isbinary, ishdf5; 3004 3005 PetscFunctionBegin; 3006 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 3007 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 3008 /* force binary viewer to load .info file if it has not yet done so */ 3009 PetscCall(PetscViewerSetUp(viewer)); 3010 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3011 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 3012 if (isbinary) { 3013 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 3014 } else if (ishdf5) { 3015 #if defined(PETSC_HAVE_HDF5) 3016 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 3017 #else 3018 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3019 #endif 3020 } else { 3021 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 3022 } 3023 PetscFunctionReturn(0); 3024 } 3025 3026 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3027 { 3028 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3029 PetscInt *rowidxs,*colidxs; 3030 PetscScalar *matvals; 3031 3032 PetscFunctionBegin; 3033 PetscCall(PetscViewerSetUp(viewer)); 3034 3035 /* read in matrix header */ 3036 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3037 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3038 M = header[1]; N = header[2]; nz = header[3]; 3039 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3040 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3041 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3042 3043 /* set block sizes from the viewer's .info file */ 3044 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3045 /* set global sizes if not set already */ 3046 if (mat->rmap->N < 0) mat->rmap->N = M; 3047 if (mat->cmap->N < 0) mat->cmap->N = N; 3048 PetscCall(PetscLayoutSetUp(mat->rmap)); 3049 PetscCall(PetscLayoutSetUp(mat->cmap)); 3050 3051 /* check if the matrix sizes are correct */ 3052 PetscCall(MatGetSize(mat,&rows,&cols)); 3053 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3054 3055 /* read in row lengths and build row indices */ 3056 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3057 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3058 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3059 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3060 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3061 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3062 /* read in column indices and matrix values */ 3063 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3064 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3065 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3066 /* store matrix indices and values */ 3067 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3068 PetscCall(PetscFree(rowidxs)); 3069 PetscCall(PetscFree2(colidxs,matvals)); 3070 PetscFunctionReturn(0); 3071 } 3072 3073 /* Not scalable because of ISAllGather() unless getting all columns. */ 3074 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3075 { 3076 IS iscol_local; 3077 PetscBool isstride; 3078 PetscMPIInt lisstride=0,gisstride; 3079 3080 PetscFunctionBegin; 3081 /* check if we are grabbing all columns*/ 3082 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3083 3084 if (isstride) { 3085 PetscInt start,len,mstart,mlen; 3086 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3087 PetscCall(ISGetLocalSize(iscol,&len)); 3088 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3089 if (mstart == start && mlen-mstart == len) lisstride = 1; 3090 } 3091 3092 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3093 if (gisstride) { 3094 PetscInt N; 3095 PetscCall(MatGetSize(mat,NULL,&N)); 3096 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3097 PetscCall(ISSetIdentity(iscol_local)); 3098 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3099 } else { 3100 PetscInt cbs; 3101 PetscCall(ISGetBlockSize(iscol,&cbs)); 3102 PetscCall(ISAllGather(iscol,&iscol_local)); 3103 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3104 } 3105 3106 *isseq = iscol_local; 3107 PetscFunctionReturn(0); 3108 } 3109 3110 /* 3111 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3112 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3113 3114 Input Parameters: 3115 mat - matrix 3116 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3117 i.e., mat->rstart <= isrow[i] < mat->rend 3118 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3119 i.e., mat->cstart <= iscol[i] < mat->cend 3120 Output Parameter: 3121 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3122 iscol_o - sequential column index set for retrieving mat->B 3123 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3124 */ 3125 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3126 { 3127 Vec x,cmap; 3128 const PetscInt *is_idx; 3129 PetscScalar *xarray,*cmaparray; 3130 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3131 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3132 Mat B=a->B; 3133 Vec lvec=a->lvec,lcmap; 3134 PetscInt i,cstart,cend,Bn=B->cmap->N; 3135 MPI_Comm comm; 3136 VecScatter Mvctx=a->Mvctx; 3137 3138 PetscFunctionBegin; 3139 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3140 PetscCall(ISGetLocalSize(iscol,&ncols)); 3141 3142 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3143 PetscCall(MatCreateVecs(mat,&x,NULL)); 3144 PetscCall(VecSet(x,-1.0)); 3145 PetscCall(VecDuplicate(x,&cmap)); 3146 PetscCall(VecSet(cmap,-1.0)); 3147 3148 /* Get start indices */ 3149 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3150 isstart -= ncols; 3151 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3152 3153 PetscCall(ISGetIndices(iscol,&is_idx)); 3154 PetscCall(VecGetArray(x,&xarray)); 3155 PetscCall(VecGetArray(cmap,&cmaparray)); 3156 PetscCall(PetscMalloc1(ncols,&idx)); 3157 for (i=0; i<ncols; i++) { 3158 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3159 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3160 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3161 } 3162 PetscCall(VecRestoreArray(x,&xarray)); 3163 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3164 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3165 3166 /* Get iscol_d */ 3167 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3168 PetscCall(ISGetBlockSize(iscol,&i)); 3169 PetscCall(ISSetBlockSize(*iscol_d,i)); 3170 3171 /* Get isrow_d */ 3172 PetscCall(ISGetLocalSize(isrow,&m)); 3173 rstart = mat->rmap->rstart; 3174 PetscCall(PetscMalloc1(m,&idx)); 3175 PetscCall(ISGetIndices(isrow,&is_idx)); 3176 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3177 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3178 3179 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3180 PetscCall(ISGetBlockSize(isrow,&i)); 3181 PetscCall(ISSetBlockSize(*isrow_d,i)); 3182 3183 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3184 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3185 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3186 3187 PetscCall(VecDuplicate(lvec,&lcmap)); 3188 3189 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3190 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3191 3192 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3193 /* off-process column indices */ 3194 count = 0; 3195 PetscCall(PetscMalloc1(Bn,&idx)); 3196 PetscCall(PetscMalloc1(Bn,&cmap1)); 3197 3198 PetscCall(VecGetArray(lvec,&xarray)); 3199 PetscCall(VecGetArray(lcmap,&cmaparray)); 3200 for (i=0; i<Bn; i++) { 3201 if (PetscRealPart(xarray[i]) > -1.0) { 3202 idx[count] = i; /* local column index in off-diagonal part B */ 3203 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3204 count++; 3205 } 3206 } 3207 PetscCall(VecRestoreArray(lvec,&xarray)); 3208 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3209 3210 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3211 /* cannot ensure iscol_o has same blocksize as iscol! */ 3212 3213 PetscCall(PetscFree(idx)); 3214 *garray = cmap1; 3215 3216 PetscCall(VecDestroy(&x)); 3217 PetscCall(VecDestroy(&cmap)); 3218 PetscCall(VecDestroy(&lcmap)); 3219 PetscFunctionReturn(0); 3220 } 3221 3222 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3223 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3224 { 3225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3226 Mat M = NULL; 3227 MPI_Comm comm; 3228 IS iscol_d,isrow_d,iscol_o; 3229 Mat Asub = NULL,Bsub = NULL; 3230 PetscInt n; 3231 3232 PetscFunctionBegin; 3233 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3234 3235 if (call == MAT_REUSE_MATRIX) { 3236 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3237 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3238 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3239 3240 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3241 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3242 3243 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3244 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3245 3246 /* Update diagonal and off-diagonal portions of submat */ 3247 asub = (Mat_MPIAIJ*)(*submat)->data; 3248 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3249 PetscCall(ISGetLocalSize(iscol_o,&n)); 3250 if (n) { 3251 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3252 } 3253 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3254 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3255 3256 } else { /* call == MAT_INITIAL_MATRIX) */ 3257 const PetscInt *garray; 3258 PetscInt BsubN; 3259 3260 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3261 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3262 3263 /* Create local submatrices Asub and Bsub */ 3264 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3265 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3266 3267 /* Create submatrix M */ 3268 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3269 3270 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3271 asub = (Mat_MPIAIJ*)M->data; 3272 3273 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3274 n = asub->B->cmap->N; 3275 if (BsubN > n) { 3276 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3277 const PetscInt *idx; 3278 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3279 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3280 3281 PetscCall(PetscMalloc1(n,&idx_new)); 3282 j = 0; 3283 PetscCall(ISGetIndices(iscol_o,&idx)); 3284 for (i=0; i<n; i++) { 3285 if (j >= BsubN) break; 3286 while (subgarray[i] > garray[j]) j++; 3287 3288 if (subgarray[i] == garray[j]) { 3289 idx_new[i] = idx[j++]; 3290 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3291 } 3292 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3293 3294 PetscCall(ISDestroy(&iscol_o)); 3295 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3296 3297 } else if (BsubN < n) { 3298 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3299 } 3300 3301 PetscCall(PetscFree(garray)); 3302 *submat = M; 3303 3304 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3305 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3306 PetscCall(ISDestroy(&isrow_d)); 3307 3308 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3309 PetscCall(ISDestroy(&iscol_d)); 3310 3311 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3312 PetscCall(ISDestroy(&iscol_o)); 3313 } 3314 PetscFunctionReturn(0); 3315 } 3316 3317 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3318 { 3319 IS iscol_local=NULL,isrow_d; 3320 PetscInt csize; 3321 PetscInt n,i,j,start,end; 3322 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3323 MPI_Comm comm; 3324 3325 PetscFunctionBegin; 3326 /* If isrow has same processor distribution as mat, 3327 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3328 if (call == MAT_REUSE_MATRIX) { 3329 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3330 if (isrow_d) { 3331 sameRowDist = PETSC_TRUE; 3332 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3333 } else { 3334 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3335 if (iscol_local) { 3336 sameRowDist = PETSC_TRUE; 3337 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3338 } 3339 } 3340 } else { 3341 /* Check if isrow has same processor distribution as mat */ 3342 sameDist[0] = PETSC_FALSE; 3343 PetscCall(ISGetLocalSize(isrow,&n)); 3344 if (!n) { 3345 sameDist[0] = PETSC_TRUE; 3346 } else { 3347 PetscCall(ISGetMinMax(isrow,&i,&j)); 3348 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3349 if (i >= start && j < end) { 3350 sameDist[0] = PETSC_TRUE; 3351 } 3352 } 3353 3354 /* Check if iscol has same processor distribution as mat */ 3355 sameDist[1] = PETSC_FALSE; 3356 PetscCall(ISGetLocalSize(iscol,&n)); 3357 if (!n) { 3358 sameDist[1] = PETSC_TRUE; 3359 } else { 3360 PetscCall(ISGetMinMax(iscol,&i,&j)); 3361 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3362 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3363 } 3364 3365 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3366 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3367 sameRowDist = tsameDist[0]; 3368 } 3369 3370 if (sameRowDist) { 3371 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3372 /* isrow and iscol have same processor distribution as mat */ 3373 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3374 PetscFunctionReturn(0); 3375 } else { /* sameRowDist */ 3376 /* isrow has same processor distribution as mat */ 3377 if (call == MAT_INITIAL_MATRIX) { 3378 PetscBool sorted; 3379 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3380 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3381 PetscCall(ISGetSize(iscol,&i)); 3382 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3383 3384 PetscCall(ISSorted(iscol_local,&sorted)); 3385 if (sorted) { 3386 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3387 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3388 PetscFunctionReturn(0); 3389 } 3390 } else { /* call == MAT_REUSE_MATRIX */ 3391 IS iscol_sub; 3392 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3393 if (iscol_sub) { 3394 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3395 PetscFunctionReturn(0); 3396 } 3397 } 3398 } 3399 } 3400 3401 /* General case: iscol -> iscol_local which has global size of iscol */ 3402 if (call == MAT_REUSE_MATRIX) { 3403 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3404 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3405 } else { 3406 if (!iscol_local) { 3407 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3408 } 3409 } 3410 3411 PetscCall(ISGetLocalSize(iscol,&csize)); 3412 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3413 3414 if (call == MAT_INITIAL_MATRIX) { 3415 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3416 PetscCall(ISDestroy(&iscol_local)); 3417 } 3418 PetscFunctionReturn(0); 3419 } 3420 3421 /*@C 3422 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3423 and "off-diagonal" part of the matrix in CSR format. 3424 3425 Collective 3426 3427 Input Parameters: 3428 + comm - MPI communicator 3429 . A - "diagonal" portion of matrix 3430 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3431 - garray - global index of B columns 3432 3433 Output Parameter: 3434 . mat - the matrix, with input A as its local diagonal matrix 3435 Level: advanced 3436 3437 Notes: 3438 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3439 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3440 3441 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3442 @*/ 3443 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3444 { 3445 Mat_MPIAIJ *maij; 3446 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3447 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3448 const PetscScalar *oa; 3449 Mat Bnew; 3450 PetscInt m,n,N; 3451 MatType mpi_mat_type; 3452 3453 PetscFunctionBegin; 3454 PetscCall(MatCreate(comm,mat)); 3455 PetscCall(MatGetSize(A,&m,&n)); 3456 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3457 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3458 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3459 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3460 3461 /* Get global columns of mat */ 3462 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3463 3464 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3465 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3466 PetscCall(MatGetMPIMatType_Private(A,&mpi_mat_type)); 3467 PetscCall(MatSetType(*mat,mpi_mat_type)); 3468 3469 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3470 maij = (Mat_MPIAIJ*)(*mat)->data; 3471 3472 (*mat)->preallocated = PETSC_TRUE; 3473 3474 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3475 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3476 3477 /* Set A as diagonal portion of *mat */ 3478 maij->A = A; 3479 3480 nz = oi[m]; 3481 for (i=0; i<nz; i++) { 3482 col = oj[i]; 3483 oj[i] = garray[col]; 3484 } 3485 3486 /* Set Bnew as off-diagonal portion of *mat */ 3487 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3488 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3489 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3490 bnew = (Mat_SeqAIJ*)Bnew->data; 3491 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3492 maij->B = Bnew; 3493 3494 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3495 3496 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3497 b->free_a = PETSC_FALSE; 3498 b->free_ij = PETSC_FALSE; 3499 PetscCall(MatDestroy(&B)); 3500 3501 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3502 bnew->free_a = PETSC_TRUE; 3503 bnew->free_ij = PETSC_TRUE; 3504 3505 /* condense columns of maij->B */ 3506 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3507 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3508 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3509 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3510 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3511 PetscFunctionReturn(0); 3512 } 3513 3514 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3515 3516 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3517 { 3518 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3519 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3520 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3521 Mat M,Msub,B=a->B; 3522 MatScalar *aa; 3523 Mat_SeqAIJ *aij; 3524 PetscInt *garray = a->garray,*colsub,Ncols; 3525 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3526 IS iscol_sub,iscmap; 3527 const PetscInt *is_idx,*cmap; 3528 PetscBool allcolumns=PETSC_FALSE; 3529 MPI_Comm comm; 3530 3531 PetscFunctionBegin; 3532 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3533 if (call == MAT_REUSE_MATRIX) { 3534 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3535 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3536 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3537 3538 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3539 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3540 3541 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3542 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3543 3544 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3545 3546 } else { /* call == MAT_INITIAL_MATRIX) */ 3547 PetscBool flg; 3548 3549 PetscCall(ISGetLocalSize(iscol,&n)); 3550 PetscCall(ISGetSize(iscol,&Ncols)); 3551 3552 /* (1) iscol -> nonscalable iscol_local */ 3553 /* Check for special case: each processor gets entire matrix columns */ 3554 PetscCall(ISIdentity(iscol_local,&flg)); 3555 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3556 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3557 if (allcolumns) { 3558 iscol_sub = iscol_local; 3559 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3560 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3561 3562 } else { 3563 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3564 PetscInt *idx,*cmap1,k; 3565 PetscCall(PetscMalloc1(Ncols,&idx)); 3566 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3567 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3568 count = 0; 3569 k = 0; 3570 for (i=0; i<Ncols; i++) { 3571 j = is_idx[i]; 3572 if (j >= cstart && j < cend) { 3573 /* diagonal part of mat */ 3574 idx[count] = j; 3575 cmap1[count++] = i; /* column index in submat */ 3576 } else if (Bn) { 3577 /* off-diagonal part of mat */ 3578 if (j == garray[k]) { 3579 idx[count] = j; 3580 cmap1[count++] = i; /* column index in submat */ 3581 } else if (j > garray[k]) { 3582 while (j > garray[k] && k < Bn-1) k++; 3583 if (j == garray[k]) { 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } 3587 } 3588 } 3589 } 3590 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3591 3592 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3593 PetscCall(ISGetBlockSize(iscol,&cbs)); 3594 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3595 3596 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3597 } 3598 3599 /* (3) Create sequential Msub */ 3600 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3601 } 3602 3603 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3604 aij = (Mat_SeqAIJ*)(Msub)->data; 3605 ii = aij->i; 3606 PetscCall(ISGetIndices(iscmap,&cmap)); 3607 3608 /* 3609 m - number of local rows 3610 Ncols - number of columns (same on all processors) 3611 rstart - first row in new global matrix generated 3612 */ 3613 PetscCall(MatGetSize(Msub,&m,NULL)); 3614 3615 if (call == MAT_INITIAL_MATRIX) { 3616 /* (4) Create parallel newmat */ 3617 PetscMPIInt rank,size; 3618 PetscInt csize; 3619 3620 PetscCallMPI(MPI_Comm_size(comm,&size)); 3621 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3622 3623 /* 3624 Determine the number of non-zeros in the diagonal and off-diagonal 3625 portions of the matrix in order to do correct preallocation 3626 */ 3627 3628 /* first get start and end of "diagonal" columns */ 3629 PetscCall(ISGetLocalSize(iscol,&csize)); 3630 if (csize == PETSC_DECIDE) { 3631 PetscCall(ISGetSize(isrow,&mglobal)); 3632 if (mglobal == Ncols) { /* square matrix */ 3633 nlocal = m; 3634 } else { 3635 nlocal = Ncols/size + ((Ncols % size) > rank); 3636 } 3637 } else { 3638 nlocal = csize; 3639 } 3640 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3641 rstart = rend - nlocal; 3642 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3643 3644 /* next, compute all the lengths */ 3645 jj = aij->j; 3646 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3647 olens = dlens + m; 3648 for (i=0; i<m; i++) { 3649 jend = ii[i+1] - ii[i]; 3650 olen = 0; 3651 dlen = 0; 3652 for (j=0; j<jend; j++) { 3653 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3654 else dlen++; 3655 jj++; 3656 } 3657 olens[i] = olen; 3658 dlens[i] = dlen; 3659 } 3660 3661 PetscCall(ISGetBlockSize(isrow,&bs)); 3662 PetscCall(ISGetBlockSize(iscol,&cbs)); 3663 3664 PetscCall(MatCreate(comm,&M)); 3665 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3666 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3667 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3668 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3669 PetscCall(PetscFree(dlens)); 3670 3671 } else { /* call == MAT_REUSE_MATRIX */ 3672 M = *newmat; 3673 PetscCall(MatGetLocalSize(M,&i,NULL)); 3674 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3675 PetscCall(MatZeroEntries(M)); 3676 /* 3677 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3678 rather than the slower MatSetValues(). 3679 */ 3680 M->was_assembled = PETSC_TRUE; 3681 M->assembled = PETSC_FALSE; 3682 } 3683 3684 /* (5) Set values of Msub to *newmat */ 3685 PetscCall(PetscMalloc1(count,&colsub)); 3686 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3687 3688 jj = aij->j; 3689 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3690 for (i=0; i<m; i++) { 3691 row = rstart + i; 3692 nz = ii[i+1] - ii[i]; 3693 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3694 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3695 jj += nz; aa += nz; 3696 } 3697 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3698 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3699 3700 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3701 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3702 3703 PetscCall(PetscFree(colsub)); 3704 3705 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3706 if (call == MAT_INITIAL_MATRIX) { 3707 *newmat = M; 3708 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3709 PetscCall(MatDestroy(&Msub)); 3710 3711 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3712 PetscCall(ISDestroy(&iscol_sub)); 3713 3714 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3715 PetscCall(ISDestroy(&iscmap)); 3716 3717 if (iscol_local) { 3718 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3719 PetscCall(ISDestroy(&iscol_local)); 3720 } 3721 } 3722 PetscFunctionReturn(0); 3723 } 3724 3725 /* 3726 Not great since it makes two copies of the submatrix, first an SeqAIJ 3727 in local and then by concatenating the local matrices the end result. 3728 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3729 3730 Note: This requires a sequential iscol with all indices. 3731 */ 3732 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3733 { 3734 PetscMPIInt rank,size; 3735 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3736 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3737 Mat M,Mreuse; 3738 MatScalar *aa,*vwork; 3739 MPI_Comm comm; 3740 Mat_SeqAIJ *aij; 3741 PetscBool colflag,allcolumns=PETSC_FALSE; 3742 3743 PetscFunctionBegin; 3744 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3745 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3746 PetscCallMPI(MPI_Comm_size(comm,&size)); 3747 3748 /* Check for special case: each processor gets entire matrix columns */ 3749 PetscCall(ISIdentity(iscol,&colflag)); 3750 PetscCall(ISGetLocalSize(iscol,&n)); 3751 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3752 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3753 3754 if (call == MAT_REUSE_MATRIX) { 3755 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3756 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3757 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3758 } else { 3759 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3760 } 3761 3762 /* 3763 m - number of local rows 3764 n - number of columns (same on all processors) 3765 rstart - first row in new global matrix generated 3766 */ 3767 PetscCall(MatGetSize(Mreuse,&m,&n)); 3768 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3769 if (call == MAT_INITIAL_MATRIX) { 3770 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3771 ii = aij->i; 3772 jj = aij->j; 3773 3774 /* 3775 Determine the number of non-zeros in the diagonal and off-diagonal 3776 portions of the matrix in order to do correct preallocation 3777 */ 3778 3779 /* first get start and end of "diagonal" columns */ 3780 if (csize == PETSC_DECIDE) { 3781 PetscCall(ISGetSize(isrow,&mglobal)); 3782 if (mglobal == n) { /* square matrix */ 3783 nlocal = m; 3784 } else { 3785 nlocal = n/size + ((n % size) > rank); 3786 } 3787 } else { 3788 nlocal = csize; 3789 } 3790 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3791 rstart = rend - nlocal; 3792 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3793 3794 /* next, compute all the lengths */ 3795 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3796 olens = dlens + m; 3797 for (i=0; i<m; i++) { 3798 jend = ii[i+1] - ii[i]; 3799 olen = 0; 3800 dlen = 0; 3801 for (j=0; j<jend; j++) { 3802 if (*jj < rstart || *jj >= rend) olen++; 3803 else dlen++; 3804 jj++; 3805 } 3806 olens[i] = olen; 3807 dlens[i] = dlen; 3808 } 3809 PetscCall(MatCreate(comm,&M)); 3810 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3811 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3812 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3813 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3814 PetscCall(PetscFree(dlens)); 3815 } else { 3816 PetscInt ml,nl; 3817 3818 M = *newmat; 3819 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3820 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3821 PetscCall(MatZeroEntries(M)); 3822 /* 3823 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3824 rather than the slower MatSetValues(). 3825 */ 3826 M->was_assembled = PETSC_TRUE; 3827 M->assembled = PETSC_FALSE; 3828 } 3829 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3830 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3831 ii = aij->i; 3832 jj = aij->j; 3833 3834 /* trigger copy to CPU if needed */ 3835 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3836 for (i=0; i<m; i++) { 3837 row = rstart + i; 3838 nz = ii[i+1] - ii[i]; 3839 cwork = jj; jj += nz; 3840 vwork = aa; aa += nz; 3841 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3842 } 3843 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3844 3845 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3846 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3847 *newmat = M; 3848 3849 /* save submatrix used in processor for next request */ 3850 if (call == MAT_INITIAL_MATRIX) { 3851 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3852 PetscCall(MatDestroy(&Mreuse)); 3853 } 3854 PetscFunctionReturn(0); 3855 } 3856 3857 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3858 { 3859 PetscInt m,cstart, cend,j,nnz,i,d,*ld; 3860 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3861 const PetscInt *JJ; 3862 PetscBool nooffprocentries; 3863 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)B->data; 3864 3865 PetscFunctionBegin; 3866 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3867 3868 PetscCall(PetscLayoutSetUp(B->rmap)); 3869 PetscCall(PetscLayoutSetUp(B->cmap)); 3870 m = B->rmap->n; 3871 cstart = B->cmap->rstart; 3872 cend = B->cmap->rend; 3873 rstart = B->rmap->rstart; 3874 3875 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3876 3877 if (PetscDefined(USE_DEBUG)) { 3878 for (i=0; i<m; i++) { 3879 nnz = Ii[i+1]- Ii[i]; 3880 JJ = J + Ii[i]; 3881 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3882 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3883 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3884 } 3885 } 3886 3887 for (i=0; i<m; i++) { 3888 nnz = Ii[i+1]- Ii[i]; 3889 JJ = J + Ii[i]; 3890 nnz_max = PetscMax(nnz_max,nnz); 3891 d = 0; 3892 for (j=0; j<nnz; j++) { 3893 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3894 } 3895 d_nnz[i] = d; 3896 o_nnz[i] = nnz - d; 3897 } 3898 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3899 PetscCall(PetscFree2(d_nnz,o_nnz)); 3900 3901 for (i=0; i<m; i++) { 3902 ii = i + rstart; 3903 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3904 } 3905 nooffprocentries = B->nooffprocentries; 3906 B->nooffprocentries = PETSC_TRUE; 3907 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3908 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3909 B->nooffprocentries = nooffprocentries; 3910 3911 /* count number of entries below block diagonal */ 3912 PetscCall(PetscFree(Aij->ld)); 3913 PetscCall(PetscCalloc1(m,&ld)); 3914 Aij->ld = ld; 3915 for (i=0; i<m; i++) { 3916 nnz = Ii[i+1] - Ii[i]; 3917 j = 0; 3918 while (j < nnz && J[j] < cstart) {j++;} 3919 ld[i] = j; 3920 J += nnz; 3921 } 3922 3923 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3924 PetscFunctionReturn(0); 3925 } 3926 3927 /*@ 3928 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3929 (the default parallel PETSc format). 3930 3931 Collective 3932 3933 Input Parameters: 3934 + B - the matrix 3935 . i - the indices into j for the start of each local row (starts with zero) 3936 . j - the column indices for each local row (starts with zero) 3937 - v - optional values in the matrix 3938 3939 Level: developer 3940 3941 Notes: 3942 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3943 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3944 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3945 3946 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3947 3948 The format which is used for the sparse matrix input, is equivalent to a 3949 row-major ordering.. i.e for the following matrix, the input data expected is 3950 as shown 3951 3952 $ 1 0 0 3953 $ 2 0 3 P0 3954 $ ------- 3955 $ 4 5 6 P1 3956 $ 3957 $ Process0 [P0]: rows_owned=[0,1] 3958 $ i = {0,1,3} [size = nrow+1 = 2+1] 3959 $ j = {0,0,2} [size = 3] 3960 $ v = {1,2,3} [size = 3] 3961 $ 3962 $ Process1 [P1]: rows_owned=[2] 3963 $ i = {0,3} [size = nrow+1 = 1+1] 3964 $ j = {0,1,2} [size = 3] 3965 $ v = {4,5,6} [size = 3] 3966 3967 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3968 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3969 @*/ 3970 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3971 { 3972 PetscFunctionBegin; 3973 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 /*@C 3978 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3979 (the default parallel PETSc format). For good matrix assembly performance 3980 the user should preallocate the matrix storage by setting the parameters 3981 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3982 performance can be increased by more than a factor of 50. 3983 3984 Collective 3985 3986 Input Parameters: 3987 + B - the matrix 3988 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3989 (same value is used for all local rows) 3990 . d_nnz - array containing the number of nonzeros in the various rows of the 3991 DIAGONAL portion of the local submatrix (possibly different for each row) 3992 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3993 The size of this array is equal to the number of local rows, i.e 'm'. 3994 For matrices that will be factored, you must leave room for (and set) 3995 the diagonal entry even if it is zero. 3996 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3997 submatrix (same value is used for all local rows). 3998 - o_nnz - array containing the number of nonzeros in the various rows of the 3999 OFF-DIAGONAL portion of the local submatrix (possibly different for 4000 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4001 structure. The size of this array is equal to the number 4002 of local rows, i.e 'm'. 4003 4004 If the *_nnz parameter is given then the *_nz parameter is ignored 4005 4006 The AIJ format (also called the Yale sparse matrix format or 4007 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4008 storage. The stored row and column indices begin with zero. 4009 See Users-Manual: ch_mat for details. 4010 4011 The parallel matrix is partitioned such that the first m0 rows belong to 4012 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4013 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4014 4015 The DIAGONAL portion of the local submatrix of a processor can be defined 4016 as the submatrix which is obtained by extraction the part corresponding to 4017 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4018 first row that belongs to the processor, r2 is the last row belonging to 4019 the this processor, and c1-c2 is range of indices of the local part of a 4020 vector suitable for applying the matrix to. This is an mxn matrix. In the 4021 common case of a square matrix, the row and column ranges are the same and 4022 the DIAGONAL part is also square. The remaining portion of the local 4023 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4024 4025 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4026 4027 You can call MatGetInfo() to get information on how effective the preallocation was; 4028 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4029 You can also run with the option -info and look for messages with the string 4030 malloc in them to see if additional memory allocation was needed. 4031 4032 Example usage: 4033 4034 Consider the following 8x8 matrix with 34 non-zero values, that is 4035 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4036 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4037 as follows: 4038 4039 .vb 4040 1 2 0 | 0 3 0 | 0 4 4041 Proc0 0 5 6 | 7 0 0 | 8 0 4042 9 0 10 | 11 0 0 | 12 0 4043 ------------------------------------- 4044 13 0 14 | 15 16 17 | 0 0 4045 Proc1 0 18 0 | 19 20 21 | 0 0 4046 0 0 0 | 22 23 0 | 24 0 4047 ------------------------------------- 4048 Proc2 25 26 27 | 0 0 28 | 29 0 4049 30 0 0 | 31 32 33 | 0 34 4050 .ve 4051 4052 This can be represented as a collection of submatrices as: 4053 4054 .vb 4055 A B C 4056 D E F 4057 G H I 4058 .ve 4059 4060 Where the submatrices A,B,C are owned by proc0, D,E,F are 4061 owned by proc1, G,H,I are owned by proc2. 4062 4063 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4064 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'M','N' parameters are 8,8, and have the same values on all procs. 4066 4067 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4068 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4069 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4070 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4071 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4072 matrix, ans [DF] as another SeqAIJ matrix. 4073 4074 When d_nz, o_nz parameters are specified, d_nz storage elements are 4075 allocated for every row of the local diagonal submatrix, and o_nz 4076 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4077 One way to choose d_nz and o_nz is to use the max nonzerors per local 4078 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4079 In this case, the values of d_nz,o_nz are: 4080 .vb 4081 proc0 : dnz = 2, o_nz = 2 4082 proc1 : dnz = 3, o_nz = 2 4083 proc2 : dnz = 1, o_nz = 4 4084 .ve 4085 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4086 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4087 for proc3. i.e we are using 12+15+10=37 storage locations to store 4088 34 values. 4089 4090 When d_nnz, o_nnz parameters are specified, the storage is specified 4091 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4092 In the above case the values for d_nnz,o_nnz are: 4093 .vb 4094 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4095 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4096 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4097 .ve 4098 Here the space allocated is sum of all the above values i.e 34, and 4099 hence pre-allocation is perfect. 4100 4101 Level: intermediate 4102 4103 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4104 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4105 @*/ 4106 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4107 { 4108 PetscFunctionBegin; 4109 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4110 PetscValidType(B,1); 4111 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4112 PetscFunctionReturn(0); 4113 } 4114 4115 /*@ 4116 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4117 CSR format for the local rows. 4118 4119 Collective 4120 4121 Input Parameters: 4122 + comm - MPI communicator 4123 . m - number of local rows (Cannot be PETSC_DECIDE) 4124 . n - This value should be the same as the local size used in creating the 4125 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4126 calculated if N is given) For square matrices n is almost always m. 4127 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4128 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4129 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4130 . j - column indices 4131 - a - optional matrix values 4132 4133 Output Parameter: 4134 . mat - the matrix 4135 4136 Level: intermediate 4137 4138 Notes: 4139 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4140 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4141 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4142 4143 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4144 4145 The format which is used for the sparse matrix input, is equivalent to a 4146 row-major ordering.. i.e for the following matrix, the input data expected is 4147 as shown 4148 4149 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4150 4151 $ 1 0 0 4152 $ 2 0 3 P0 4153 $ ------- 4154 $ 4 5 6 P1 4155 $ 4156 $ Process0 [P0]: rows_owned=[0,1] 4157 $ i = {0,1,3} [size = nrow+1 = 2+1] 4158 $ j = {0,0,2} [size = 3] 4159 $ v = {1,2,3} [size = 3] 4160 $ 4161 $ Process1 [P1]: rows_owned=[2] 4162 $ i = {0,3} [size = nrow+1 = 1+1] 4163 $ j = {0,1,2} [size = 3] 4164 $ v = {4,5,6} [size = 3] 4165 4166 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4167 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4168 @*/ 4169 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4170 { 4171 PetscFunctionBegin; 4172 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4173 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4174 PetscCall(MatCreate(comm,mat)); 4175 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4176 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4177 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4178 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4179 PetscFunctionReturn(0); 4180 } 4181 4182 /*@ 4183 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4184 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4185 4186 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4187 4188 Collective 4189 4190 Input Parameters: 4191 + mat - the matrix 4192 . m - number of local rows (Cannot be PETSC_DECIDE) 4193 . n - This value should be the same as the local size used in creating the 4194 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4195 calculated if N is given) For square matrices n is almost always m. 4196 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4197 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4198 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4199 . J - column indices 4200 - v - matrix values 4201 4202 Level: intermediate 4203 4204 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4205 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4206 @*/ 4207 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4208 { 4209 PetscInt nnz,i; 4210 PetscBool nooffprocentries; 4211 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4212 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4213 PetscScalar *ad,*ao; 4214 PetscInt ldi,Iii,md; 4215 const PetscInt *Adi = Ad->i; 4216 PetscInt *ld = Aij->ld; 4217 4218 PetscFunctionBegin; 4219 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4220 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4221 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4222 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4223 4224 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4225 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4226 4227 for (i=0; i<m; i++) { 4228 nnz = Ii[i+1]- Ii[i]; 4229 Iii = Ii[i]; 4230 ldi = ld[i]; 4231 md = Adi[i+1]-Adi[i]; 4232 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4233 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4234 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4235 ad += md; 4236 ao += nnz - md; 4237 } 4238 nooffprocentries = mat->nooffprocentries; 4239 mat->nooffprocentries = PETSC_TRUE; 4240 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4241 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4242 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4243 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4244 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4245 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4246 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4247 mat->nooffprocentries = nooffprocentries; 4248 PetscFunctionReturn(0); 4249 } 4250 4251 /*@ 4252 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4253 4254 Collective 4255 4256 Input Parameters: 4257 + mat - the matrix 4258 - v - matrix values, stored by row 4259 4260 Level: intermediate 4261 4262 Notes: 4263 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4264 4265 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4266 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4267 @*/ 4268 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat,const PetscScalar v[]) 4269 { 4270 PetscInt nnz,i,m; 4271 PetscBool nooffprocentries; 4272 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4273 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4274 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)Aij->B->data; 4275 PetscScalar *ad,*ao; 4276 const PetscInt *Adi = Ad->i,*Adj = Ao->i; 4277 PetscInt ldi,Iii,md; 4278 PetscInt *ld = Aij->ld; 4279 4280 PetscFunctionBegin; 4281 m = mat->rmap->n; 4282 4283 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4284 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4285 Iii = 0; 4286 for (i=0; i<m; i++) { 4287 nnz = Adi[i+1]-Adi[i] + Adj[i+1]-Adj[i]; 4288 ldi = ld[i]; 4289 md = Adi[i+1]-Adi[i]; 4290 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4291 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4292 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4293 ad += md; 4294 ao += nnz - md; 4295 Iii += nnz; 4296 } 4297 nooffprocentries = mat->nooffprocentries; 4298 mat->nooffprocentries = PETSC_TRUE; 4299 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4300 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4301 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4302 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4303 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4304 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4305 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4306 mat->nooffprocentries = nooffprocentries; 4307 PetscFunctionReturn(0); 4308 } 4309 4310 /*@C 4311 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4312 (the default parallel PETSc format). For good matrix assembly performance 4313 the user should preallocate the matrix storage by setting the parameters 4314 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4315 performance can be increased by more than a factor of 50. 4316 4317 Collective 4318 4319 Input Parameters: 4320 + comm - MPI communicator 4321 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4322 This value should be the same as the local size used in creating the 4323 y vector for the matrix-vector product y = Ax. 4324 . n - This value should be the same as the local size used in creating the 4325 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4326 calculated if N is given) For square matrices n is almost always m. 4327 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4328 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4329 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4330 (same value is used for all local rows) 4331 . d_nnz - array containing the number of nonzeros in the various rows of the 4332 DIAGONAL portion of the local submatrix (possibly different for each row) 4333 or NULL, if d_nz is used to specify the nonzero structure. 4334 The size of this array is equal to the number of local rows, i.e 'm'. 4335 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4336 submatrix (same value is used for all local rows). 4337 - o_nnz - array containing the number of nonzeros in the various rows of the 4338 OFF-DIAGONAL portion of the local submatrix (possibly different for 4339 each row) or NULL, if o_nz is used to specify the nonzero 4340 structure. The size of this array is equal to the number 4341 of local rows, i.e 'm'. 4342 4343 Output Parameter: 4344 . A - the matrix 4345 4346 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4347 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4348 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4349 4350 Notes: 4351 If the *_nnz parameter is given then the *_nz parameter is ignored 4352 4353 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4354 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4355 storage requirements for this matrix. 4356 4357 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4358 processor than it must be used on all processors that share the object for 4359 that argument. 4360 4361 The user MUST specify either the local or global matrix dimensions 4362 (possibly both). 4363 4364 The parallel matrix is partitioned across processors such that the 4365 first m0 rows belong to process 0, the next m1 rows belong to 4366 process 1, the next m2 rows belong to process 2 etc.. where 4367 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4368 values corresponding to [m x N] submatrix. 4369 4370 The columns are logically partitioned with the n0 columns belonging 4371 to 0th partition, the next n1 columns belonging to the next 4372 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4373 4374 The DIAGONAL portion of the local submatrix on any given processor 4375 is the submatrix corresponding to the rows and columns m,n 4376 corresponding to the given processor. i.e diagonal matrix on 4377 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4378 etc. The remaining portion of the local submatrix [m x (N-n)] 4379 constitute the OFF-DIAGONAL portion. The example below better 4380 illustrates this concept. 4381 4382 For a square global matrix we define each processor's diagonal portion 4383 to be its local rows and the corresponding columns (a square submatrix); 4384 each processor's off-diagonal portion encompasses the remainder of the 4385 local matrix (a rectangular submatrix). 4386 4387 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4388 4389 When calling this routine with a single process communicator, a matrix of 4390 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4391 type of communicator, use the construction mechanism 4392 .vb 4393 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4394 .ve 4395 4396 $ MatCreate(...,&A); 4397 $ MatSetType(A,MATMPIAIJ); 4398 $ MatSetSizes(A, m,n,M,N); 4399 $ MatMPIAIJSetPreallocation(A,...); 4400 4401 By default, this format uses inodes (identical nodes) when possible. 4402 We search for consecutive rows with the same nonzero structure, thereby 4403 reusing matrix information to achieve increased efficiency. 4404 4405 Options Database Keys: 4406 + -mat_no_inode - Do not use inodes 4407 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4408 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4409 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4410 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4411 4412 Example usage: 4413 4414 Consider the following 8x8 matrix with 34 non-zero values, that is 4415 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4416 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4417 as follows 4418 4419 .vb 4420 1 2 0 | 0 3 0 | 0 4 4421 Proc0 0 5 6 | 7 0 0 | 8 0 4422 9 0 10 | 11 0 0 | 12 0 4423 ------------------------------------- 4424 13 0 14 | 15 16 17 | 0 0 4425 Proc1 0 18 0 | 19 20 21 | 0 0 4426 0 0 0 | 22 23 0 | 24 0 4427 ------------------------------------- 4428 Proc2 25 26 27 | 0 0 28 | 29 0 4429 30 0 0 | 31 32 33 | 0 34 4430 .ve 4431 4432 This can be represented as a collection of submatrices as 4433 4434 .vb 4435 A B C 4436 D E F 4437 G H I 4438 .ve 4439 4440 Where the submatrices A,B,C are owned by proc0, D,E,F are 4441 owned by proc1, G,H,I are owned by proc2. 4442 4443 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4444 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4445 The 'M','N' parameters are 8,8, and have the same values on all procs. 4446 4447 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4448 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4449 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4450 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4451 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4452 matrix, ans [DF] as another SeqAIJ matrix. 4453 4454 When d_nz, o_nz parameters are specified, d_nz storage elements are 4455 allocated for every row of the local diagonal submatrix, and o_nz 4456 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4457 One way to choose d_nz and o_nz is to use the max nonzerors per local 4458 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4459 In this case, the values of d_nz,o_nz are 4460 .vb 4461 proc0 : dnz = 2, o_nz = 2 4462 proc1 : dnz = 3, o_nz = 2 4463 proc2 : dnz = 1, o_nz = 4 4464 .ve 4465 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4466 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4467 for proc3. i.e we are using 12+15+10=37 storage locations to store 4468 34 values. 4469 4470 When d_nnz, o_nnz parameters are specified, the storage is specified 4471 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4472 In the above case the values for d_nnz,o_nnz are 4473 .vb 4474 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4475 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4476 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4477 .ve 4478 Here the space allocated is sum of all the above values i.e 34, and 4479 hence pre-allocation is perfect. 4480 4481 Level: intermediate 4482 4483 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4484 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4485 @*/ 4486 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4487 { 4488 PetscMPIInt size; 4489 4490 PetscFunctionBegin; 4491 PetscCall(MatCreate(comm,A)); 4492 PetscCall(MatSetSizes(*A,m,n,M,N)); 4493 PetscCallMPI(MPI_Comm_size(comm,&size)); 4494 if (size > 1) { 4495 PetscCall(MatSetType(*A,MATMPIAIJ)); 4496 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4497 } else { 4498 PetscCall(MatSetType(*A,MATSEQAIJ)); 4499 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4500 } 4501 PetscFunctionReturn(0); 4502 } 4503 4504 /*@C 4505 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4506 4507 Not collective 4508 4509 Input Parameter: 4510 . A - The MPIAIJ matrix 4511 4512 Output Parameters: 4513 + Ad - The local diagonal block as a SeqAIJ matrix 4514 . Ao - The local off-diagonal block as a SeqAIJ matrix 4515 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4516 4517 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4518 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4519 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4520 local column numbers to global column numbers in the original matrix. 4521 4522 Level: intermediate 4523 4524 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4525 @*/ 4526 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4527 { 4528 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4529 PetscBool flg; 4530 4531 PetscFunctionBegin; 4532 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4533 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4534 if (Ad) *Ad = a->A; 4535 if (Ao) *Ao = a->B; 4536 if (colmap) *colmap = a->garray; 4537 PetscFunctionReturn(0); 4538 } 4539 4540 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4541 { 4542 PetscInt m,N,i,rstart,nnz,Ii; 4543 PetscInt *indx; 4544 PetscScalar *values; 4545 MatType rootType; 4546 4547 PetscFunctionBegin; 4548 PetscCall(MatGetSize(inmat,&m,&N)); 4549 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4550 PetscInt *dnz,*onz,sum,bs,cbs; 4551 4552 if (n == PETSC_DECIDE) { 4553 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4554 } 4555 /* Check sum(n) = N */ 4556 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4557 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4558 4559 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4560 rstart -= m; 4561 4562 MatPreallocateBegin(comm,m,n,dnz,onz); 4563 for (i=0; i<m; i++) { 4564 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4565 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4566 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4567 } 4568 4569 PetscCall(MatCreate(comm,outmat)); 4570 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4571 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4572 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4573 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4574 PetscCall(MatSetType(*outmat,rootType)); 4575 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4576 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4577 MatPreallocateEnd(dnz,onz); 4578 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4579 } 4580 4581 /* numeric phase */ 4582 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4583 for (i=0; i<m; i++) { 4584 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4585 Ii = i + rstart; 4586 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4587 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4588 } 4589 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4590 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4591 PetscFunctionReturn(0); 4592 } 4593 4594 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4595 { 4596 PetscMPIInt rank; 4597 PetscInt m,N,i,rstart,nnz; 4598 size_t len; 4599 const PetscInt *indx; 4600 PetscViewer out; 4601 char *name; 4602 Mat B; 4603 const PetscScalar *values; 4604 4605 PetscFunctionBegin; 4606 PetscCall(MatGetLocalSize(A,&m,NULL)); 4607 PetscCall(MatGetSize(A,NULL,&N)); 4608 /* Should this be the type of the diagonal block of A? */ 4609 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4610 PetscCall(MatSetSizes(B,m,N,m,N)); 4611 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4612 PetscCall(MatSetType(B,MATSEQAIJ)); 4613 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4614 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4615 for (i=0; i<m; i++) { 4616 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4617 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4618 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4619 } 4620 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4621 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4622 4623 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4624 PetscCall(PetscStrlen(outfile,&len)); 4625 PetscCall(PetscMalloc1(len+6,&name)); 4626 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4627 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4628 PetscCall(PetscFree(name)); 4629 PetscCall(MatView(B,out)); 4630 PetscCall(PetscViewerDestroy(&out)); 4631 PetscCall(MatDestroy(&B)); 4632 PetscFunctionReturn(0); 4633 } 4634 4635 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4636 { 4637 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4638 4639 PetscFunctionBegin; 4640 if (!merge) PetscFunctionReturn(0); 4641 PetscCall(PetscFree(merge->id_r)); 4642 PetscCall(PetscFree(merge->len_s)); 4643 PetscCall(PetscFree(merge->len_r)); 4644 PetscCall(PetscFree(merge->bi)); 4645 PetscCall(PetscFree(merge->bj)); 4646 PetscCall(PetscFree(merge->buf_ri[0])); 4647 PetscCall(PetscFree(merge->buf_ri)); 4648 PetscCall(PetscFree(merge->buf_rj[0])); 4649 PetscCall(PetscFree(merge->buf_rj)); 4650 PetscCall(PetscFree(merge->coi)); 4651 PetscCall(PetscFree(merge->coj)); 4652 PetscCall(PetscFree(merge->owners_co)); 4653 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4654 PetscCall(PetscFree(merge)); 4655 PetscFunctionReturn(0); 4656 } 4657 4658 #include <../src/mat/utils/freespace.h> 4659 #include <petscbt.h> 4660 4661 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4662 { 4663 MPI_Comm comm; 4664 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4665 PetscMPIInt size,rank,taga,*len_s; 4666 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4667 PetscInt proc,m; 4668 PetscInt **buf_ri,**buf_rj; 4669 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4670 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4671 MPI_Request *s_waits,*r_waits; 4672 MPI_Status *status; 4673 const MatScalar *aa,*a_a; 4674 MatScalar **abuf_r,*ba_i; 4675 Mat_Merge_SeqsToMPI *merge; 4676 PetscContainer container; 4677 4678 PetscFunctionBegin; 4679 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4680 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4681 4682 PetscCallMPI(MPI_Comm_size(comm,&size)); 4683 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4684 4685 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4686 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4687 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4688 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4689 aa = a_a; 4690 4691 bi = merge->bi; 4692 bj = merge->bj; 4693 buf_ri = merge->buf_ri; 4694 buf_rj = merge->buf_rj; 4695 4696 PetscCall(PetscMalloc1(size,&status)); 4697 owners = merge->rowmap->range; 4698 len_s = merge->len_s; 4699 4700 /* send and recv matrix values */ 4701 /*-----------------------------*/ 4702 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4703 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4704 4705 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4706 for (proc=0,k=0; proc<size; proc++) { 4707 if (!len_s[proc]) continue; 4708 i = owners[proc]; 4709 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4710 k++; 4711 } 4712 4713 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4714 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4715 PetscCall(PetscFree(status)); 4716 4717 PetscCall(PetscFree(s_waits)); 4718 PetscCall(PetscFree(r_waits)); 4719 4720 /* insert mat values of mpimat */ 4721 /*----------------------------*/ 4722 PetscCall(PetscMalloc1(N,&ba_i)); 4723 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4724 4725 for (k=0; k<merge->nrecv; k++) { 4726 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4727 nrows = *(buf_ri_k[k]); 4728 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4729 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4730 } 4731 4732 /* set values of ba */ 4733 m = merge->rowmap->n; 4734 for (i=0; i<m; i++) { 4735 arow = owners[rank] + i; 4736 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4737 bnzi = bi[i+1] - bi[i]; 4738 PetscCall(PetscArrayzero(ba_i,bnzi)); 4739 4740 /* add local non-zero vals of this proc's seqmat into ba */ 4741 anzi = ai[arow+1] - ai[arow]; 4742 aj = a->j + ai[arow]; 4743 aa = a_a + ai[arow]; 4744 nextaj = 0; 4745 for (j=0; nextaj<anzi; j++) { 4746 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4747 ba_i[j] += aa[nextaj++]; 4748 } 4749 } 4750 4751 /* add received vals into ba */ 4752 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4753 /* i-th row */ 4754 if (i == *nextrow[k]) { 4755 anzi = *(nextai[k]+1) - *nextai[k]; 4756 aj = buf_rj[k] + *(nextai[k]); 4757 aa = abuf_r[k] + *(nextai[k]); 4758 nextaj = 0; 4759 for (j=0; nextaj<anzi; j++) { 4760 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4761 ba_i[j] += aa[nextaj++]; 4762 } 4763 } 4764 nextrow[k]++; nextai[k]++; 4765 } 4766 } 4767 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4768 } 4769 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4770 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4771 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4772 4773 PetscCall(PetscFree(abuf_r[0])); 4774 PetscCall(PetscFree(abuf_r)); 4775 PetscCall(PetscFree(ba_i)); 4776 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4777 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4778 PetscFunctionReturn(0); 4779 } 4780 4781 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4782 { 4783 Mat B_mpi; 4784 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4785 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4786 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4787 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4788 PetscInt len,proc,*dnz,*onz,bs,cbs; 4789 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi; 4790 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4791 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4792 MPI_Status *status; 4793 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4794 PetscBT lnkbt; 4795 Mat_Merge_SeqsToMPI *merge; 4796 PetscContainer container; 4797 4798 PetscFunctionBegin; 4799 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4800 4801 /* make sure it is a PETSc comm */ 4802 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4803 PetscCallMPI(MPI_Comm_size(comm,&size)); 4804 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4805 4806 PetscCall(PetscNew(&merge)); 4807 PetscCall(PetscMalloc1(size,&status)); 4808 4809 /* determine row ownership */ 4810 /*---------------------------------------------------------*/ 4811 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4812 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4813 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4814 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4815 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4816 PetscCall(PetscMalloc1(size,&len_si)); 4817 PetscCall(PetscMalloc1(size,&merge->len_s)); 4818 4819 m = merge->rowmap->n; 4820 owners = merge->rowmap->range; 4821 4822 /* determine the number of messages to send, their lengths */ 4823 /*---------------------------------------------------------*/ 4824 len_s = merge->len_s; 4825 4826 len = 0; /* length of buf_si[] */ 4827 merge->nsend = 0; 4828 for (proc=0; proc<size; proc++) { 4829 len_si[proc] = 0; 4830 if (proc == rank) { 4831 len_s[proc] = 0; 4832 } else { 4833 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4834 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4835 } 4836 if (len_s[proc]) { 4837 merge->nsend++; 4838 nrows = 0; 4839 for (i=owners[proc]; i<owners[proc+1]; i++) { 4840 if (ai[i+1] > ai[i]) nrows++; 4841 } 4842 len_si[proc] = 2*(nrows+1); 4843 len += len_si[proc]; 4844 } 4845 } 4846 4847 /* determine the number and length of messages to receive for ij-structure */ 4848 /*-------------------------------------------------------------------------*/ 4849 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4850 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4851 4852 /* post the Irecv of j-structure */ 4853 /*-------------------------------*/ 4854 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4855 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4856 4857 /* post the Isend of j-structure */ 4858 /*--------------------------------*/ 4859 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4860 4861 for (proc=0, k=0; proc<size; proc++) { 4862 if (!len_s[proc]) continue; 4863 i = owners[proc]; 4864 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4865 k++; 4866 } 4867 4868 /* receives and sends of j-structure are complete */ 4869 /*------------------------------------------------*/ 4870 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4871 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4872 4873 /* send and recv i-structure */ 4874 /*---------------------------*/ 4875 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4876 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4877 4878 PetscCall(PetscMalloc1(len+1,&buf_s)); 4879 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4880 for (proc=0,k=0; proc<size; proc++) { 4881 if (!len_s[proc]) continue; 4882 /* form outgoing message for i-structure: 4883 buf_si[0]: nrows to be sent 4884 [1:nrows]: row index (global) 4885 [nrows+1:2*nrows+1]: i-structure index 4886 */ 4887 /*-------------------------------------------*/ 4888 nrows = len_si[proc]/2 - 1; 4889 buf_si_i = buf_si + nrows+1; 4890 buf_si[0] = nrows; 4891 buf_si_i[0] = 0; 4892 nrows = 0; 4893 for (i=owners[proc]; i<owners[proc+1]; i++) { 4894 anzi = ai[i+1] - ai[i]; 4895 if (anzi) { 4896 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4897 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4898 nrows++; 4899 } 4900 } 4901 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4902 k++; 4903 buf_si += len_si[proc]; 4904 } 4905 4906 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4907 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4908 4909 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4910 for (i=0; i<merge->nrecv; i++) { 4911 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4912 } 4913 4914 PetscCall(PetscFree(len_si)); 4915 PetscCall(PetscFree(len_ri)); 4916 PetscCall(PetscFree(rj_waits)); 4917 PetscCall(PetscFree2(si_waits,sj_waits)); 4918 PetscCall(PetscFree(ri_waits)); 4919 PetscCall(PetscFree(buf_s)); 4920 PetscCall(PetscFree(status)); 4921 4922 /* compute a local seq matrix in each processor */ 4923 /*----------------------------------------------*/ 4924 /* allocate bi array and free space for accumulating nonzero column info */ 4925 PetscCall(PetscMalloc1(m+1,&bi)); 4926 bi[0] = 0; 4927 4928 /* create and initialize a linked list */ 4929 nlnk = N+1; 4930 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4931 4932 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4933 len = ai[owners[rank+1]] - ai[owners[rank]]; 4934 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4935 4936 current_space = free_space; 4937 4938 /* determine symbolic info for each local row */ 4939 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4940 4941 for (k=0; k<merge->nrecv; k++) { 4942 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4943 nrows = *buf_ri_k[k]; 4944 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4945 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4946 } 4947 4948 MatPreallocateBegin(comm,m,n,dnz,onz); 4949 len = 0; 4950 for (i=0; i<m; i++) { 4951 bnzi = 0; 4952 /* add local non-zero cols of this proc's seqmat into lnk */ 4953 arow = owners[rank] + i; 4954 anzi = ai[arow+1] - ai[arow]; 4955 aj = a->j + ai[arow]; 4956 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4957 bnzi += nlnk; 4958 /* add received col data into lnk */ 4959 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4960 if (i == *nextrow[k]) { /* i-th row */ 4961 anzi = *(nextai[k]+1) - *nextai[k]; 4962 aj = buf_rj[k] + *nextai[k]; 4963 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4964 bnzi += nlnk; 4965 nextrow[k]++; nextai[k]++; 4966 } 4967 } 4968 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4969 4970 /* if free space is not available, make more free space */ 4971 if (current_space->local_remaining<bnzi) { 4972 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4973 } 4974 /* copy data into free space, then initialize lnk */ 4975 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4976 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4977 4978 current_space->array += bnzi; 4979 current_space->local_used += bnzi; 4980 current_space->local_remaining -= bnzi; 4981 4982 bi[i+1] = bi[i] + bnzi; 4983 } 4984 4985 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4986 4987 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4988 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4989 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4990 4991 /* create symbolic parallel matrix B_mpi */ 4992 /*---------------------------------------*/ 4993 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4994 PetscCall(MatCreate(comm,&B_mpi)); 4995 if (n==PETSC_DECIDE) { 4996 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4997 } else { 4998 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4999 } 5000 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 5001 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 5002 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 5003 MatPreallocateEnd(dnz,onz); 5004 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 5005 5006 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5007 B_mpi->assembled = PETSC_FALSE; 5008 merge->bi = bi; 5009 merge->bj = bj; 5010 merge->buf_ri = buf_ri; 5011 merge->buf_rj = buf_rj; 5012 merge->coi = NULL; 5013 merge->coj = NULL; 5014 merge->owners_co = NULL; 5015 5016 PetscCall(PetscCommDestroy(&comm)); 5017 5018 /* attach the supporting struct to B_mpi for reuse */ 5019 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 5020 PetscCall(PetscContainerSetPointer(container,merge)); 5021 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 5022 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 5023 PetscCall(PetscContainerDestroy(&container)); 5024 *mpimat = B_mpi; 5025 5026 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 5027 PetscFunctionReturn(0); 5028 } 5029 5030 /*@C 5031 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5032 matrices from each processor 5033 5034 Collective 5035 5036 Input Parameters: 5037 + comm - the communicators the parallel matrix will live on 5038 . seqmat - the input sequential matrices 5039 . m - number of local rows (or PETSC_DECIDE) 5040 . n - number of local columns (or PETSC_DECIDE) 5041 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5042 5043 Output Parameter: 5044 . mpimat - the parallel matrix generated 5045 5046 Level: advanced 5047 5048 Notes: 5049 The dimensions of the sequential matrix in each processor MUST be the same. 5050 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5051 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5052 @*/ 5053 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5054 { 5055 PetscMPIInt size; 5056 5057 PetscFunctionBegin; 5058 PetscCallMPI(MPI_Comm_size(comm,&size)); 5059 if (size == 1) { 5060 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5061 if (scall == MAT_INITIAL_MATRIX) { 5062 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 5063 } else { 5064 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 5065 } 5066 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5067 PetscFunctionReturn(0); 5068 } 5069 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 5070 if (scall == MAT_INITIAL_MATRIX) { 5071 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 5072 } 5073 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 5074 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 5075 PetscFunctionReturn(0); 5076 } 5077 5078 /*@ 5079 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5080 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5081 with MatGetSize() 5082 5083 Not Collective 5084 5085 Input Parameters: 5086 + A - the matrix 5087 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5088 5089 Output Parameter: 5090 . A_loc - the local sequential matrix generated 5091 5092 Level: developer 5093 5094 Notes: 5095 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5096 5097 Destroy the matrix with MatDestroy() 5098 5099 .seealso: MatMPIAIJGetLocalMat() 5100 5101 @*/ 5102 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5103 { 5104 PetscBool mpi; 5105 5106 PetscFunctionBegin; 5107 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5108 if (mpi) { 5109 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5110 } else { 5111 *A_loc = A; 5112 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5113 } 5114 PetscFunctionReturn(0); 5115 } 5116 5117 /*@ 5118 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5119 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5120 with MatGetSize() 5121 5122 Not Collective 5123 5124 Input Parameters: 5125 + A - the matrix 5126 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5127 5128 Output Parameter: 5129 . A_loc - the local sequential matrix generated 5130 5131 Level: developer 5132 5133 Notes: 5134 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5135 5136 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5137 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5138 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5139 modify the values of the returned A_loc. 5140 5141 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5142 @*/ 5143 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5144 { 5145 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5146 Mat_SeqAIJ *mat,*a,*b; 5147 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5148 const PetscScalar *aa,*ba,*aav,*bav; 5149 PetscScalar *ca,*cam; 5150 PetscMPIInt size; 5151 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5152 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5153 PetscBool match; 5154 5155 PetscFunctionBegin; 5156 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5157 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5158 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5159 if (size == 1) { 5160 if (scall == MAT_INITIAL_MATRIX) { 5161 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5162 *A_loc = mpimat->A; 5163 } else if (scall == MAT_REUSE_MATRIX) { 5164 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5165 } 5166 PetscFunctionReturn(0); 5167 } 5168 5169 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5170 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5171 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5172 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5173 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5174 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5175 aa = aav; 5176 ba = bav; 5177 if (scall == MAT_INITIAL_MATRIX) { 5178 PetscCall(PetscMalloc1(1+am,&ci)); 5179 ci[0] = 0; 5180 for (i=0; i<am; i++) { 5181 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5182 } 5183 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5184 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5185 k = 0; 5186 for (i=0; i<am; i++) { 5187 ncols_o = bi[i+1] - bi[i]; 5188 ncols_d = ai[i+1] - ai[i]; 5189 /* off-diagonal portion of A */ 5190 for (jo=0; jo<ncols_o; jo++) { 5191 col = cmap[*bj]; 5192 if (col >= cstart) break; 5193 cj[k] = col; bj++; 5194 ca[k++] = *ba++; 5195 } 5196 /* diagonal portion of A */ 5197 for (j=0; j<ncols_d; j++) { 5198 cj[k] = cstart + *aj++; 5199 ca[k++] = *aa++; 5200 } 5201 /* off-diagonal portion of A */ 5202 for (j=jo; j<ncols_o; j++) { 5203 cj[k] = cmap[*bj++]; 5204 ca[k++] = *ba++; 5205 } 5206 } 5207 /* put together the new matrix */ 5208 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5209 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5210 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5211 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5212 mat->free_a = PETSC_TRUE; 5213 mat->free_ij = PETSC_TRUE; 5214 mat->nonew = 0; 5215 } else if (scall == MAT_REUSE_MATRIX) { 5216 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5217 ci = mat->i; 5218 cj = mat->j; 5219 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5220 for (i=0; i<am; i++) { 5221 /* off-diagonal portion of A */ 5222 ncols_o = bi[i+1] - bi[i]; 5223 for (jo=0; jo<ncols_o; jo++) { 5224 col = cmap[*bj]; 5225 if (col >= cstart) break; 5226 *cam++ = *ba++; bj++; 5227 } 5228 /* diagonal portion of A */ 5229 ncols_d = ai[i+1] - ai[i]; 5230 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5231 /* off-diagonal portion of A */ 5232 for (j=jo; j<ncols_o; j++) { 5233 *cam++ = *ba++; bj++; 5234 } 5235 } 5236 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5237 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5238 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5239 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5240 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5241 PetscFunctionReturn(0); 5242 } 5243 5244 /*@ 5245 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5246 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5247 5248 Not Collective 5249 5250 Input Parameters: 5251 + A - the matrix 5252 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5253 5254 Output Parameters: 5255 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5256 - A_loc - the local sequential matrix generated 5257 5258 Level: developer 5259 5260 Notes: 5261 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5262 5263 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5264 5265 @*/ 5266 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5267 { 5268 Mat Ao,Ad; 5269 const PetscInt *cmap; 5270 PetscMPIInt size; 5271 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5272 5273 PetscFunctionBegin; 5274 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5275 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5276 if (size == 1) { 5277 if (scall == MAT_INITIAL_MATRIX) { 5278 PetscCall(PetscObjectReference((PetscObject)Ad)); 5279 *A_loc = Ad; 5280 } else if (scall == MAT_REUSE_MATRIX) { 5281 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5282 } 5283 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5284 PetscFunctionReturn(0); 5285 } 5286 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5287 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5288 if (f) { 5289 PetscCall((*f)(A,scall,glob,A_loc)); 5290 } else { 5291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5292 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5293 Mat_SeqAIJ *c; 5294 PetscInt *ai = a->i, *aj = a->j; 5295 PetscInt *bi = b->i, *bj = b->j; 5296 PetscInt *ci,*cj; 5297 const PetscScalar *aa,*ba; 5298 PetscScalar *ca; 5299 PetscInt i,j,am,dn,on; 5300 5301 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5302 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5303 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5304 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5305 if (scall == MAT_INITIAL_MATRIX) { 5306 PetscInt k; 5307 PetscCall(PetscMalloc1(1+am,&ci)); 5308 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5309 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5310 ci[0] = 0; 5311 for (i=0,k=0; i<am; i++) { 5312 const PetscInt ncols_o = bi[i+1] - bi[i]; 5313 const PetscInt ncols_d = ai[i+1] - ai[i]; 5314 ci[i+1] = ci[i] + ncols_o + ncols_d; 5315 /* diagonal portion of A */ 5316 for (j=0; j<ncols_d; j++,k++) { 5317 cj[k] = *aj++; 5318 ca[k] = *aa++; 5319 } 5320 /* off-diagonal portion of A */ 5321 for (j=0; j<ncols_o; j++,k++) { 5322 cj[k] = dn + *bj++; 5323 ca[k] = *ba++; 5324 } 5325 } 5326 /* put together the new matrix */ 5327 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5328 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5329 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5330 c = (Mat_SeqAIJ*)(*A_loc)->data; 5331 c->free_a = PETSC_TRUE; 5332 c->free_ij = PETSC_TRUE; 5333 c->nonew = 0; 5334 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5335 } else if (scall == MAT_REUSE_MATRIX) { 5336 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5337 for (i=0; i<am; i++) { 5338 const PetscInt ncols_d = ai[i+1] - ai[i]; 5339 const PetscInt ncols_o = bi[i+1] - bi[i]; 5340 /* diagonal portion of A */ 5341 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5342 /* off-diagonal portion of A */ 5343 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5344 } 5345 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5346 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5347 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5348 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5349 if (glob) { 5350 PetscInt cst, *gidx; 5351 5352 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5353 PetscCall(PetscMalloc1(dn+on,&gidx)); 5354 for (i=0; i<dn; i++) gidx[i] = cst + i; 5355 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5356 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5357 } 5358 } 5359 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5360 PetscFunctionReturn(0); 5361 } 5362 5363 /*@C 5364 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5365 5366 Not Collective 5367 5368 Input Parameters: 5369 + A - the matrix 5370 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5371 - row, col - index sets of rows and columns to extract (or NULL) 5372 5373 Output Parameter: 5374 . A_loc - the local sequential matrix generated 5375 5376 Level: developer 5377 5378 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5379 5380 @*/ 5381 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5382 { 5383 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5384 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5385 IS isrowa,iscola; 5386 Mat *aloc; 5387 PetscBool match; 5388 5389 PetscFunctionBegin; 5390 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5391 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5392 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5393 if (!row) { 5394 start = A->rmap->rstart; end = A->rmap->rend; 5395 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5396 } else { 5397 isrowa = *row; 5398 } 5399 if (!col) { 5400 start = A->cmap->rstart; 5401 cmap = a->garray; 5402 nzA = a->A->cmap->n; 5403 nzB = a->B->cmap->n; 5404 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5405 ncols = 0; 5406 for (i=0; i<nzB; i++) { 5407 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5408 else break; 5409 } 5410 imark = i; 5411 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5412 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5413 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5414 } else { 5415 iscola = *col; 5416 } 5417 if (scall != MAT_INITIAL_MATRIX) { 5418 PetscCall(PetscMalloc1(1,&aloc)); 5419 aloc[0] = *A_loc; 5420 } 5421 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5422 if (!col) { /* attach global id of condensed columns */ 5423 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5424 } 5425 *A_loc = aloc[0]; 5426 PetscCall(PetscFree(aloc)); 5427 if (!row) { 5428 PetscCall(ISDestroy(&isrowa)); 5429 } 5430 if (!col) { 5431 PetscCall(ISDestroy(&iscola)); 5432 } 5433 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5434 PetscFunctionReturn(0); 5435 } 5436 5437 /* 5438 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5439 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5440 * on a global size. 5441 * */ 5442 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5443 { 5444 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5445 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5446 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5447 PetscMPIInt owner; 5448 PetscSFNode *iremote,*oiremote; 5449 const PetscInt *lrowindices; 5450 PetscSF sf,osf; 5451 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5452 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5453 MPI_Comm comm; 5454 ISLocalToGlobalMapping mapping; 5455 const PetscScalar *pd_a,*po_a; 5456 5457 PetscFunctionBegin; 5458 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5459 /* plocalsize is the number of roots 5460 * nrows is the number of leaves 5461 * */ 5462 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5463 PetscCall(ISGetLocalSize(rows,&nrows)); 5464 PetscCall(PetscCalloc1(nrows,&iremote)); 5465 PetscCall(ISGetIndices(rows,&lrowindices)); 5466 for (i=0;i<nrows;i++) { 5467 /* Find a remote index and an owner for a row 5468 * The row could be local or remote 5469 * */ 5470 owner = 0; 5471 lidx = 0; 5472 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5473 iremote[i].index = lidx; 5474 iremote[i].rank = owner; 5475 } 5476 /* Create SF to communicate how many nonzero columns for each row */ 5477 PetscCall(PetscSFCreate(comm,&sf)); 5478 /* SF will figure out the number of nonzero colunms for each row, and their 5479 * offsets 5480 * */ 5481 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5482 PetscCall(PetscSFSetFromOptions(sf)); 5483 PetscCall(PetscSFSetUp(sf)); 5484 5485 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5486 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5487 PetscCall(PetscCalloc1(nrows,&pnnz)); 5488 roffsets[0] = 0; 5489 roffsets[1] = 0; 5490 for (i=0;i<plocalsize;i++) { 5491 /* diag */ 5492 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5493 /* off diag */ 5494 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5495 /* compute offsets so that we relative location for each row */ 5496 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5497 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5498 } 5499 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5500 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5501 /* 'r' means root, and 'l' means leaf */ 5502 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5503 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5504 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5505 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5506 PetscCall(PetscSFDestroy(&sf)); 5507 PetscCall(PetscFree(roffsets)); 5508 PetscCall(PetscFree(nrcols)); 5509 dntotalcols = 0; 5510 ontotalcols = 0; 5511 ncol = 0; 5512 for (i=0;i<nrows;i++) { 5513 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5514 ncol = PetscMax(pnnz[i],ncol); 5515 /* diag */ 5516 dntotalcols += nlcols[i*2+0]; 5517 /* off diag */ 5518 ontotalcols += nlcols[i*2+1]; 5519 } 5520 /* We do not need to figure the right number of columns 5521 * since all the calculations will be done by going through the raw data 5522 * */ 5523 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5524 PetscCall(MatSetUp(*P_oth)); 5525 PetscCall(PetscFree(pnnz)); 5526 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5527 /* diag */ 5528 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5529 /* off diag */ 5530 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5531 /* diag */ 5532 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5533 /* off diag */ 5534 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5535 dntotalcols = 0; 5536 ontotalcols = 0; 5537 ntotalcols = 0; 5538 for (i=0;i<nrows;i++) { 5539 owner = 0; 5540 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5541 /* Set iremote for diag matrix */ 5542 for (j=0;j<nlcols[i*2+0];j++) { 5543 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5544 iremote[dntotalcols].rank = owner; 5545 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5546 ilocal[dntotalcols++] = ntotalcols++; 5547 } 5548 /* off diag */ 5549 for (j=0;j<nlcols[i*2+1];j++) { 5550 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5551 oiremote[ontotalcols].rank = owner; 5552 oilocal[ontotalcols++] = ntotalcols++; 5553 } 5554 } 5555 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5556 PetscCall(PetscFree(loffsets)); 5557 PetscCall(PetscFree(nlcols)); 5558 PetscCall(PetscSFCreate(comm,&sf)); 5559 /* P serves as roots and P_oth is leaves 5560 * Diag matrix 5561 * */ 5562 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5563 PetscCall(PetscSFSetFromOptions(sf)); 5564 PetscCall(PetscSFSetUp(sf)); 5565 5566 PetscCall(PetscSFCreate(comm,&osf)); 5567 /* Off diag */ 5568 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5569 PetscCall(PetscSFSetFromOptions(osf)); 5570 PetscCall(PetscSFSetUp(osf)); 5571 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5572 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5573 /* We operate on the matrix internal data for saving memory */ 5574 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5575 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5576 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5577 /* Convert to global indices for diag matrix */ 5578 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5579 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5580 /* We want P_oth store global indices */ 5581 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5582 /* Use memory scalable approach */ 5583 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5584 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5585 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5586 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5587 /* Convert back to local indices */ 5588 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5589 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5590 nout = 0; 5591 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5592 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5593 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5594 /* Exchange values */ 5595 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5596 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5597 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5598 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5599 /* Stop PETSc from shrinking memory */ 5600 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5601 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5602 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5603 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5604 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5605 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5606 PetscCall(PetscSFDestroy(&sf)); 5607 PetscCall(PetscSFDestroy(&osf)); 5608 PetscFunctionReturn(0); 5609 } 5610 5611 /* 5612 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5613 * This supports MPIAIJ and MAIJ 5614 * */ 5615 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5616 { 5617 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5618 Mat_SeqAIJ *p_oth; 5619 IS rows,map; 5620 PetscHMapI hamp; 5621 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5622 MPI_Comm comm; 5623 PetscSF sf,osf; 5624 PetscBool has; 5625 5626 PetscFunctionBegin; 5627 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5628 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5629 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5630 * and then create a submatrix (that often is an overlapping matrix) 5631 * */ 5632 if (reuse == MAT_INITIAL_MATRIX) { 5633 /* Use a hash table to figure out unique keys */ 5634 PetscCall(PetscHMapICreate(&hamp)); 5635 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5636 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5637 count = 0; 5638 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5639 for (i=0;i<a->B->cmap->n;i++) { 5640 key = a->garray[i]/dof; 5641 PetscCall(PetscHMapIHas(hamp,key,&has)); 5642 if (!has) { 5643 mapping[i] = count; 5644 PetscCall(PetscHMapISet(hamp,key,count++)); 5645 } else { 5646 /* Current 'i' has the same value the previous step */ 5647 mapping[i] = count-1; 5648 } 5649 } 5650 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5651 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5652 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5653 PetscCall(PetscCalloc1(htsize,&rowindices)); 5654 off = 0; 5655 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5656 PetscCall(PetscHMapIDestroy(&hamp)); 5657 PetscCall(PetscSortInt(htsize,rowindices)); 5658 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5659 /* In case, the matrix was already created but users want to recreate the matrix */ 5660 PetscCall(MatDestroy(P_oth)); 5661 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5662 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5663 PetscCall(ISDestroy(&map)); 5664 PetscCall(ISDestroy(&rows)); 5665 } else if (reuse == MAT_REUSE_MATRIX) { 5666 /* If matrix was already created, we simply update values using SF objects 5667 * that as attached to the matrix ealier. 5668 */ 5669 const PetscScalar *pd_a,*po_a; 5670 5671 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5672 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5673 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5674 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5675 /* Update values in place */ 5676 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5677 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5678 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5679 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5680 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5681 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5682 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5683 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5684 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5685 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5686 PetscFunctionReturn(0); 5687 } 5688 5689 /*@C 5690 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5691 5692 Collective on Mat 5693 5694 Input Parameters: 5695 + A - the first matrix in mpiaij format 5696 . B - the second matrix in mpiaij format 5697 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5698 5699 Output Parameters: 5700 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5701 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5702 - B_seq - the sequential matrix generated 5703 5704 Level: developer 5705 5706 @*/ 5707 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5708 { 5709 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5710 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5711 IS isrowb,iscolb; 5712 Mat *bseq=NULL; 5713 5714 PetscFunctionBegin; 5715 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5716 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5717 } 5718 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5719 5720 if (scall == MAT_INITIAL_MATRIX) { 5721 start = A->cmap->rstart; 5722 cmap = a->garray; 5723 nzA = a->A->cmap->n; 5724 nzB = a->B->cmap->n; 5725 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5726 ncols = 0; 5727 for (i=0; i<nzB; i++) { /* row < local row index */ 5728 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5729 else break; 5730 } 5731 imark = i; 5732 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5733 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5734 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5735 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5736 } else { 5737 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5738 isrowb = *rowb; iscolb = *colb; 5739 PetscCall(PetscMalloc1(1,&bseq)); 5740 bseq[0] = *B_seq; 5741 } 5742 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5743 *B_seq = bseq[0]; 5744 PetscCall(PetscFree(bseq)); 5745 if (!rowb) { 5746 PetscCall(ISDestroy(&isrowb)); 5747 } else { 5748 *rowb = isrowb; 5749 } 5750 if (!colb) { 5751 PetscCall(ISDestroy(&iscolb)); 5752 } else { 5753 *colb = iscolb; 5754 } 5755 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5756 PetscFunctionReturn(0); 5757 } 5758 5759 /* 5760 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5761 of the OFF-DIAGONAL portion of local A 5762 5763 Collective on Mat 5764 5765 Input Parameters: 5766 + A,B - the matrices in mpiaij format 5767 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5768 5769 Output Parameter: 5770 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5771 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5772 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5773 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5774 5775 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5776 for this matrix. This is not desirable.. 5777 5778 Level: developer 5779 5780 */ 5781 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5782 { 5783 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5784 Mat_SeqAIJ *b_oth; 5785 VecScatter ctx; 5786 MPI_Comm comm; 5787 const PetscMPIInt *rprocs,*sprocs; 5788 const PetscInt *srow,*rstarts,*sstarts; 5789 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5790 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5791 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5792 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5793 PetscMPIInt size,tag,rank,nreqs; 5794 5795 PetscFunctionBegin; 5796 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5797 PetscCallMPI(MPI_Comm_size(comm,&size)); 5798 5799 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5800 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5801 } 5802 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5803 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5804 5805 if (size == 1) { 5806 startsj_s = NULL; 5807 bufa_ptr = NULL; 5808 *B_oth = NULL; 5809 PetscFunctionReturn(0); 5810 } 5811 5812 ctx = a->Mvctx; 5813 tag = ((PetscObject)ctx)->tag; 5814 5815 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5816 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5817 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5818 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5819 PetscCall(PetscMalloc1(nreqs,&reqs)); 5820 rwaits = reqs; 5821 swaits = reqs + nrecvs; 5822 5823 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5824 if (scall == MAT_INITIAL_MATRIX) { 5825 /* i-array */ 5826 /*---------*/ 5827 /* post receives */ 5828 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5829 for (i=0; i<nrecvs; i++) { 5830 rowlen = rvalues + rstarts[i]*rbs; 5831 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5832 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5833 } 5834 5835 /* pack the outgoing message */ 5836 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5837 5838 sstartsj[0] = 0; 5839 rstartsj[0] = 0; 5840 len = 0; /* total length of j or a array to be sent */ 5841 if (nsends) { 5842 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5843 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5844 } 5845 for (i=0; i<nsends; i++) { 5846 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5847 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5848 for (j=0; j<nrows; j++) { 5849 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5850 for (l=0; l<sbs; l++) { 5851 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5852 5853 rowlen[j*sbs+l] = ncols; 5854 5855 len += ncols; 5856 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5857 } 5858 k++; 5859 } 5860 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5861 5862 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5863 } 5864 /* recvs and sends of i-array are completed */ 5865 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5866 PetscCall(PetscFree(svalues)); 5867 5868 /* allocate buffers for sending j and a arrays */ 5869 PetscCall(PetscMalloc1(len+1,&bufj)); 5870 PetscCall(PetscMalloc1(len+1,&bufa)); 5871 5872 /* create i-array of B_oth */ 5873 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5874 5875 b_othi[0] = 0; 5876 len = 0; /* total length of j or a array to be received */ 5877 k = 0; 5878 for (i=0; i<nrecvs; i++) { 5879 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5880 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5881 for (j=0; j<nrows; j++) { 5882 b_othi[k+1] = b_othi[k] + rowlen[j]; 5883 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5884 k++; 5885 } 5886 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5887 } 5888 PetscCall(PetscFree(rvalues)); 5889 5890 /* allocate space for j and a arrays of B_oth */ 5891 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5892 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5893 5894 /* j-array */ 5895 /*---------*/ 5896 /* post receives of j-array */ 5897 for (i=0; i<nrecvs; i++) { 5898 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5899 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5900 } 5901 5902 /* pack the outgoing message j-array */ 5903 if (nsends) k = sstarts[0]; 5904 for (i=0; i<nsends; i++) { 5905 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5906 bufJ = bufj+sstartsj[i]; 5907 for (j=0; j<nrows; j++) { 5908 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5909 for (ll=0; ll<sbs; ll++) { 5910 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5911 for (l=0; l<ncols; l++) { 5912 *bufJ++ = cols[l]; 5913 } 5914 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5915 } 5916 } 5917 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5918 } 5919 5920 /* recvs and sends of j-array are completed */ 5921 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5922 } else if (scall == MAT_REUSE_MATRIX) { 5923 sstartsj = *startsj_s; 5924 rstartsj = *startsj_r; 5925 bufa = *bufa_ptr; 5926 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5927 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5928 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5929 5930 /* a-array */ 5931 /*---------*/ 5932 /* post receives of a-array */ 5933 for (i=0; i<nrecvs; i++) { 5934 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5935 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5936 } 5937 5938 /* pack the outgoing message a-array */ 5939 if (nsends) k = sstarts[0]; 5940 for (i=0; i<nsends; i++) { 5941 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5942 bufA = bufa+sstartsj[i]; 5943 for (j=0; j<nrows; j++) { 5944 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5945 for (ll=0; ll<sbs; ll++) { 5946 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5947 for (l=0; l<ncols; l++) { 5948 *bufA++ = vals[l]; 5949 } 5950 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5951 } 5952 } 5953 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5954 } 5955 /* recvs and sends of a-array are completed */ 5956 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5957 PetscCall(PetscFree(reqs)); 5958 5959 if (scall == MAT_INITIAL_MATRIX) { 5960 /* put together the new matrix */ 5961 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5962 5963 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5964 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5965 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5966 b_oth->free_a = PETSC_TRUE; 5967 b_oth->free_ij = PETSC_TRUE; 5968 b_oth->nonew = 0; 5969 5970 PetscCall(PetscFree(bufj)); 5971 if (!startsj_s || !bufa_ptr) { 5972 PetscCall(PetscFree2(sstartsj,rstartsj)); 5973 PetscCall(PetscFree(bufa_ptr)); 5974 } else { 5975 *startsj_s = sstartsj; 5976 *startsj_r = rstartsj; 5977 *bufa_ptr = bufa; 5978 } 5979 } else if (scall == MAT_REUSE_MATRIX) { 5980 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5981 } 5982 5983 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5984 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5985 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5986 PetscFunctionReturn(0); 5987 } 5988 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5991 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5992 #if defined(PETSC_HAVE_MKL_SPARSE) 5993 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5994 #endif 5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5997 #if defined(PETSC_HAVE_ELEMENTAL) 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5999 #endif 6000 #if defined(PETSC_HAVE_SCALAPACK) 6001 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 6002 #endif 6003 #if defined(PETSC_HAVE_HYPRE) 6004 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 6005 #endif 6006 #if defined(PETSC_HAVE_CUDA) 6007 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 6008 #endif 6009 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 6011 #endif 6012 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 6013 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 6014 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6015 6016 /* 6017 Computes (B'*A')' since computing B*A directly is untenable 6018 6019 n p p 6020 [ ] [ ] [ ] 6021 m [ A ] * n [ B ] = m [ C ] 6022 [ ] [ ] [ ] 6023 6024 */ 6025 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 6026 { 6027 Mat At,Bt,Ct; 6028 6029 PetscFunctionBegin; 6030 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 6031 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 6032 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 6033 PetscCall(MatDestroy(&At)); 6034 PetscCall(MatDestroy(&Bt)); 6035 PetscCall(MatTransposeSetPrecursor(Ct,C)); 6036 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 6037 PetscCall(MatDestroy(&Ct)); 6038 PetscFunctionReturn(0); 6039 } 6040 6041 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 6042 { 6043 PetscBool cisdense; 6044 6045 PetscFunctionBegin; 6046 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 6047 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 6048 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 6049 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 6050 if (!cisdense) { 6051 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 6052 } 6053 PetscCall(MatSetUp(C)); 6054 6055 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6056 PetscFunctionReturn(0); 6057 } 6058 6059 /* ----------------------------------------------------------------*/ 6060 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6061 { 6062 Mat_Product *product = C->product; 6063 Mat A = product->A,B=product->B; 6064 6065 PetscFunctionBegin; 6066 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6067 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 6068 6069 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6070 C->ops->productsymbolic = MatProductSymbolic_AB; 6071 PetscFunctionReturn(0); 6072 } 6073 6074 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6075 { 6076 Mat_Product *product = C->product; 6077 6078 PetscFunctionBegin; 6079 if (product->type == MATPRODUCT_AB) { 6080 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6081 } 6082 PetscFunctionReturn(0); 6083 } 6084 6085 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6086 6087 Input Parameters: 6088 6089 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6090 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6091 6092 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6093 6094 For Set1, j1[] contains column indices of the nonzeros. 6095 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6096 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6097 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6098 6099 Similar for Set2. 6100 6101 This routine merges the two sets of nonzeros row by row and removes repeats. 6102 6103 Output Parameters: (memory is allocated by the caller) 6104 6105 i[],j[]: the CSR of the merged matrix, which has m rows. 6106 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6107 imap2[]: similar to imap1[], but for Set2. 6108 Note we order nonzeros row-by-row and from left to right. 6109 */ 6110 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6111 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6112 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6113 { 6114 PetscInt r,m; /* Row index of mat */ 6115 PetscCount t,t1,t2,b1,e1,b2,e2; 6116 6117 PetscFunctionBegin; 6118 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6119 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6120 i[0] = 0; 6121 for (r=0; r<m; r++) { /* Do row by row merging */ 6122 b1 = rowBegin1[r]; 6123 e1 = rowEnd1[r]; 6124 b2 = rowBegin2[r]; 6125 e2 = rowEnd2[r]; 6126 while (b1 < e1 && b2 < e2) { 6127 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6128 j[t] = j1[b1]; 6129 imap1[t1] = t; 6130 imap2[t2] = t; 6131 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6132 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6133 t1++; t2++; t++; 6134 } else if (j1[b1] < j2[b2]) { 6135 j[t] = j1[b1]; 6136 imap1[t1] = t; 6137 b1 += jmap1[t1+1] - jmap1[t1]; 6138 t1++; t++; 6139 } else { 6140 j[t] = j2[b2]; 6141 imap2[t2] = t; 6142 b2 += jmap2[t2+1] - jmap2[t2]; 6143 t2++; t++; 6144 } 6145 } 6146 /* Merge the remaining in either j1[] or j2[] */ 6147 while (b1 < e1) { 6148 j[t] = j1[b1]; 6149 imap1[t1] = t; 6150 b1 += jmap1[t1+1] - jmap1[t1]; 6151 t1++; t++; 6152 } 6153 while (b2 < e2) { 6154 j[t] = j2[b2]; 6155 imap2[t2] = t; 6156 b2 += jmap2[t2+1] - jmap2[t2]; 6157 t2++; t++; 6158 } 6159 i[r+1] = t; 6160 } 6161 PetscFunctionReturn(0); 6162 } 6163 6164 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6165 6166 Input Parameters: 6167 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6168 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6169 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6170 6171 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6172 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6173 6174 Output Parameters: 6175 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6176 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6177 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6178 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6179 6180 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6181 Atot: number of entries belonging to the diagonal block. 6182 Annz: number of unique nonzeros belonging to the diagonal block. 6183 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6184 repeats (i.e., same 'i,j' pair). 6185 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6186 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6187 6188 Atot: number of entries belonging to the diagonal block 6189 Annz: number of unique nonzeros belonging to the diagonal block. 6190 6191 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6192 6193 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6194 */ 6195 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6196 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6197 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6198 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6199 { 6200 PetscInt cstart,cend,rstart,rend,row,col; 6201 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6202 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6203 PetscCount k,m,p,q,r,s,mid; 6204 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6205 6206 PetscFunctionBegin; 6207 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6208 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6209 m = rend - rstart; 6210 6211 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6212 6213 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6214 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6215 */ 6216 while (k<n) { 6217 row = i[k]; 6218 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6219 for (s=k; s<n; s++) if (i[s] != row) break; 6220 for (p=k; p<s; p++) { 6221 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6222 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6223 } 6224 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6225 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6226 rowBegin[row-rstart] = k; 6227 rowMid[row-rstart] = mid; 6228 rowEnd[row-rstart] = s; 6229 6230 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6231 Atot += mid - k; 6232 Btot += s - mid; 6233 6234 /* Count unique nonzeros of this diag/offdiag row */ 6235 for (p=k; p<mid;) { 6236 col = j[p]; 6237 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6238 Annz++; 6239 } 6240 6241 for (p=mid; p<s;) { 6242 col = j[p]; 6243 do {p++;} while (p<s && j[p] == col); 6244 Bnnz++; 6245 } 6246 k = s; 6247 } 6248 6249 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6250 PetscCall(PetscMalloc1(Atot,&Aperm)); 6251 PetscCall(PetscMalloc1(Btot,&Bperm)); 6252 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6253 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6254 6255 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6256 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6257 for (r=0; r<m; r++) { 6258 k = rowBegin[r]; 6259 mid = rowMid[r]; 6260 s = rowEnd[r]; 6261 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6262 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6263 Atot += mid - k; 6264 Btot += s - mid; 6265 6266 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6267 for (p=k; p<mid;) { 6268 col = j[p]; 6269 q = p; 6270 do {p++;} while (p<mid && j[p] == col); 6271 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6272 Annz++; 6273 } 6274 6275 for (p=mid; p<s;) { 6276 col = j[p]; 6277 q = p; 6278 do {p++;} while (p<s && j[p] == col); 6279 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6280 Bnnz++; 6281 } 6282 } 6283 /* Output */ 6284 *Aperm_ = Aperm; 6285 *Annz_ = Annz; 6286 *Atot_ = Atot; 6287 *Ajmap_ = Ajmap; 6288 *Bperm_ = Bperm; 6289 *Bnnz_ = Bnnz; 6290 *Btot_ = Btot; 6291 *Bjmap_ = Bjmap; 6292 PetscFunctionReturn(0); 6293 } 6294 6295 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6296 6297 Input Parameters: 6298 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6299 nnz: number of unique nonzeros in the merged matrix 6300 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6301 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6302 6303 Output Parameter: (memory is allocated by the caller) 6304 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6305 6306 Example: 6307 nnz1 = 4 6308 nnz = 6 6309 imap = [1,3,4,5] 6310 jmap = [0,3,5,6,7] 6311 then, 6312 jmap_new = [0,0,3,3,5,6,7] 6313 */ 6314 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6315 { 6316 PetscCount k,p; 6317 6318 PetscFunctionBegin; 6319 jmap_new[0] = 0; 6320 p = nnz; /* p loops over jmap_new[] backwards */ 6321 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6322 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6323 } 6324 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6325 PetscFunctionReturn(0); 6326 } 6327 6328 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6329 { 6330 MPI_Comm comm; 6331 PetscMPIInt rank,size; 6332 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6333 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6334 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6335 6336 PetscFunctionBegin; 6337 PetscCall(PetscFree(mpiaij->garray)); 6338 PetscCall(VecDestroy(&mpiaij->lvec)); 6339 #if defined(PETSC_USE_CTABLE) 6340 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6341 #else 6342 PetscCall(PetscFree(mpiaij->colmap)); 6343 #endif 6344 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6345 mat->assembled = PETSC_FALSE; 6346 mat->was_assembled = PETSC_FALSE; 6347 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6348 6349 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6350 PetscCallMPI(MPI_Comm_size(comm,&size)); 6351 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6352 PetscCall(PetscLayoutSetUp(mat->rmap)); 6353 PetscCall(PetscLayoutSetUp(mat->cmap)); 6354 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6355 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6356 PetscCall(MatGetLocalSize(mat,&m,&n)); 6357 PetscCall(MatGetSize(mat,&M,&N)); 6358 6359 /* ---------------------------------------------------------------------------*/ 6360 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6361 /* entries come first, then local rows, then remote rows. */ 6362 /* ---------------------------------------------------------------------------*/ 6363 PetscCount n1 = coo_n,*perm1; 6364 PetscInt *i1 = coo_i,*j1 = coo_j; 6365 6366 PetscCall(PetscMalloc1(n1,&perm1)); 6367 for (k=0; k<n1; k++) perm1[k] = k; 6368 6369 /* Manipulate indices so that entries with negative row or col indices will have smallest 6370 row indices, local entries will have greater but negative row indices, and remote entries 6371 will have positive row indices. 6372 */ 6373 for (k=0; k<n1; k++) { 6374 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6375 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6376 else { 6377 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6378 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6379 } 6380 } 6381 6382 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6383 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6384 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6385 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6386 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6387 6388 /* ---------------------------------------------------------------------------*/ 6389 /* Split local rows into diag/offdiag portions */ 6390 /* ---------------------------------------------------------------------------*/ 6391 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6392 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6393 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6394 6395 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6396 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6397 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6398 6399 /* ---------------------------------------------------------------------------*/ 6400 /* Send remote rows to their owner */ 6401 /* ---------------------------------------------------------------------------*/ 6402 /* Find which rows should be sent to which remote ranks*/ 6403 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6404 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6405 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6406 const PetscInt *ranges; 6407 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6408 6409 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6410 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6411 for (k=rem; k<n1;) { 6412 PetscMPIInt owner; 6413 PetscInt firstRow,lastRow; 6414 6415 /* Locate a row range */ 6416 firstRow = i1[k]; /* first row of this owner */ 6417 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6418 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6419 6420 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6421 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6422 6423 /* All entries in [k,p) belong to this remote owner */ 6424 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6425 PetscMPIInt *sendto2; 6426 PetscInt *nentries2; 6427 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6428 6429 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6430 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6431 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6432 PetscCall(PetscFree2(sendto,nentries2)); 6433 sendto = sendto2; 6434 nentries = nentries2; 6435 maxNsend = maxNsend2; 6436 } 6437 sendto[nsend] = owner; 6438 nentries[nsend] = p - k; 6439 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6440 nsend++; 6441 k = p; 6442 } 6443 6444 /* Build 1st SF to know offsets on remote to send data */ 6445 PetscSF sf1; 6446 PetscInt nroots = 1,nroots2 = 0; 6447 PetscInt nleaves = nsend,nleaves2 = 0; 6448 PetscInt *offsets; 6449 PetscSFNode *iremote; 6450 6451 PetscCall(PetscSFCreate(comm,&sf1)); 6452 PetscCall(PetscMalloc1(nsend,&iremote)); 6453 PetscCall(PetscMalloc1(nsend,&offsets)); 6454 for (k=0; k<nsend; k++) { 6455 iremote[k].rank = sendto[k]; 6456 iremote[k].index = 0; 6457 nleaves2 += nentries[k]; 6458 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6459 } 6460 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6461 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6462 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6463 PetscCall(PetscSFDestroy(&sf1)); 6464 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6465 6466 /* Build 2nd SF to send remote COOs to their owner */ 6467 PetscSF sf2; 6468 nroots = nroots2; 6469 nleaves = nleaves2; 6470 PetscCall(PetscSFCreate(comm,&sf2)); 6471 PetscCall(PetscSFSetFromOptions(sf2)); 6472 PetscCall(PetscMalloc1(nleaves,&iremote)); 6473 p = 0; 6474 for (k=0; k<nsend; k++) { 6475 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6476 for (q=0; q<nentries[k]; q++,p++) { 6477 iremote[p].rank = sendto[k]; 6478 iremote[p].index = offsets[k] + q; 6479 } 6480 } 6481 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6482 6483 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6484 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6485 6486 /* Send the remote COOs to their owner */ 6487 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6488 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6489 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6490 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6491 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6492 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6493 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6494 6495 PetscCall(PetscFree(offsets)); 6496 PetscCall(PetscFree2(sendto,nentries)); 6497 6498 /* ---------------------------------------------------------------*/ 6499 /* Sort received COOs by row along with the permutation array */ 6500 /* ---------------------------------------------------------------*/ 6501 for (k=0; k<n2; k++) perm2[k] = k; 6502 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6503 6504 /* ---------------------------------------------------------------*/ 6505 /* Split received COOs into diag/offdiag portions */ 6506 /* ---------------------------------------------------------------*/ 6507 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6508 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6509 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6510 6511 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6512 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6513 6514 /* --------------------------------------------------------------------------*/ 6515 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6516 /* --------------------------------------------------------------------------*/ 6517 PetscInt *Ai,*Bi; 6518 PetscInt *Aj,*Bj; 6519 6520 PetscCall(PetscMalloc1(m+1,&Ai)); 6521 PetscCall(PetscMalloc1(m+1,&Bi)); 6522 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6523 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6524 6525 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6526 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6527 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6528 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6529 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6530 6531 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6532 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6533 6534 /* --------------------------------------------------------------------------*/ 6535 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6536 /* expect nonzeros in A/B most likely have local contributing entries */ 6537 /* --------------------------------------------------------------------------*/ 6538 PetscInt Annz = Ai[m]; 6539 PetscInt Bnnz = Bi[m]; 6540 PetscCount *Ajmap1_new,*Bjmap1_new; 6541 6542 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6543 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6544 6545 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6546 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6547 6548 PetscCall(PetscFree(Aimap1)); 6549 PetscCall(PetscFree(Ajmap1)); 6550 PetscCall(PetscFree(Bimap1)); 6551 PetscCall(PetscFree(Bjmap1)); 6552 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6553 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6554 PetscCall(PetscFree(perm1)); 6555 PetscCall(PetscFree3(i2,j2,perm2)); 6556 6557 Ajmap1 = Ajmap1_new; 6558 Bjmap1 = Bjmap1_new; 6559 6560 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6561 if (Annz < Annz1 + Annz2) { 6562 PetscInt *Aj_new; 6563 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6564 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6565 PetscCall(PetscFree(Aj)); 6566 Aj = Aj_new; 6567 } 6568 6569 if (Bnnz < Bnnz1 + Bnnz2) { 6570 PetscInt *Bj_new; 6571 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6572 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6573 PetscCall(PetscFree(Bj)); 6574 Bj = Bj_new; 6575 } 6576 6577 /* --------------------------------------------------------------------------------*/ 6578 /* Create new submatrices for on-process and off-process coupling */ 6579 /* --------------------------------------------------------------------------------*/ 6580 PetscScalar *Aa,*Ba; 6581 MatType rtype; 6582 Mat_SeqAIJ *a,*b; 6583 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6584 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6585 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6586 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6587 PetscCall(MatDestroy(&mpiaij->A)); 6588 PetscCall(MatDestroy(&mpiaij->B)); 6589 PetscCall(MatGetRootType_Private(mat,&rtype)); 6590 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6591 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6592 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6593 6594 a = (Mat_SeqAIJ*)mpiaij->A->data; 6595 b = (Mat_SeqAIJ*)mpiaij->B->data; 6596 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6597 a->free_a = b->free_a = PETSC_TRUE; 6598 a->free_ij = b->free_ij = PETSC_TRUE; 6599 6600 /* conversion must happen AFTER multiply setup */ 6601 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6602 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6603 PetscCall(VecDestroy(&mpiaij->lvec)); 6604 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6605 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6606 6607 mpiaij->coo_n = coo_n; 6608 mpiaij->coo_sf = sf2; 6609 mpiaij->sendlen = nleaves; 6610 mpiaij->recvlen = nroots; 6611 6612 mpiaij->Annz = Annz; 6613 mpiaij->Bnnz = Bnnz; 6614 6615 mpiaij->Annz2 = Annz2; 6616 mpiaij->Bnnz2 = Bnnz2; 6617 6618 mpiaij->Atot1 = Atot1; 6619 mpiaij->Atot2 = Atot2; 6620 mpiaij->Btot1 = Btot1; 6621 mpiaij->Btot2 = Btot2; 6622 6623 mpiaij->Ajmap1 = Ajmap1; 6624 mpiaij->Aperm1 = Aperm1; 6625 6626 mpiaij->Bjmap1 = Bjmap1; 6627 mpiaij->Bperm1 = Bperm1; 6628 6629 mpiaij->Aimap2 = Aimap2; 6630 mpiaij->Ajmap2 = Ajmap2; 6631 mpiaij->Aperm2 = Aperm2; 6632 6633 mpiaij->Bimap2 = Bimap2; 6634 mpiaij->Bjmap2 = Bjmap2; 6635 mpiaij->Bperm2 = Bperm2; 6636 6637 mpiaij->Cperm1 = Cperm1; 6638 6639 /* Allocate in preallocation. If not used, it has zero cost on host */ 6640 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6641 PetscFunctionReturn(0); 6642 } 6643 6644 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6645 { 6646 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6647 Mat A = mpiaij->A,B = mpiaij->B; 6648 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6649 PetscScalar *Aa,*Ba; 6650 PetscScalar *sendbuf = mpiaij->sendbuf; 6651 PetscScalar *recvbuf = mpiaij->recvbuf; 6652 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6653 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6654 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6655 const PetscCount *Cperm1 = mpiaij->Cperm1; 6656 6657 PetscFunctionBegin; 6658 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6659 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6660 6661 /* Pack entries to be sent to remote */ 6662 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6663 6664 /* Send remote entries to their owner and overlap the communication with local computation */ 6665 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6666 /* Add local entries to A and B */ 6667 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6668 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6669 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6670 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6671 } 6672 for (PetscCount i=0; i<Bnnz; i++) { 6673 PetscScalar sum = 0.0; 6674 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6675 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6676 } 6677 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6678 6679 /* Add received remote entries to A and B */ 6680 for (PetscCount i=0; i<Annz2; i++) { 6681 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6682 } 6683 for (PetscCount i=0; i<Bnnz2; i++) { 6684 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6685 } 6686 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6687 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6688 PetscFunctionReturn(0); 6689 } 6690 6691 /* ----------------------------------------------------------------*/ 6692 6693 /*MC 6694 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6695 6696 Options Database Keys: 6697 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6698 6699 Level: beginner 6700 6701 Notes: 6702 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6703 in this case the values associated with the rows and columns one passes in are set to zero 6704 in the matrix 6705 6706 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6707 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6708 6709 .seealso: `MatCreateAIJ()` 6710 M*/ 6711 6712 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6713 { 6714 Mat_MPIAIJ *b; 6715 PetscMPIInt size; 6716 6717 PetscFunctionBegin; 6718 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6719 6720 PetscCall(PetscNewLog(B,&b)); 6721 B->data = (void*)b; 6722 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6723 B->assembled = PETSC_FALSE; 6724 B->insertmode = NOT_SET_VALUES; 6725 b->size = size; 6726 6727 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6728 6729 /* build cache for off array entries formed */ 6730 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6731 6732 b->donotstash = PETSC_FALSE; 6733 b->colmap = NULL; 6734 b->garray = NULL; 6735 b->roworiented = PETSC_TRUE; 6736 6737 /* stuff used for matrix vector multiply */ 6738 b->lvec = NULL; 6739 b->Mvctx = NULL; 6740 6741 /* stuff for MatGetRow() */ 6742 b->rowindices = NULL; 6743 b->rowvalues = NULL; 6744 b->getrowactive = PETSC_FALSE; 6745 6746 /* flexible pointer used in CUSPARSE classes */ 6747 b->spptr = NULL; 6748 6749 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6750 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6751 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6752 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6753 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6754 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6755 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6756 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6757 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6758 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6759 #if defined(PETSC_HAVE_CUDA) 6760 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6761 #endif 6762 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6763 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6764 #endif 6765 #if defined(PETSC_HAVE_MKL_SPARSE) 6766 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6767 #endif 6768 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6769 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6770 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6771 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6772 #if defined(PETSC_HAVE_ELEMENTAL) 6773 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6774 #endif 6775 #if defined(PETSC_HAVE_SCALAPACK) 6776 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6777 #endif 6778 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6779 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6780 #if defined(PETSC_HAVE_HYPRE) 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6782 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6783 #endif 6784 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6785 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6786 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6787 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6788 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6789 PetscFunctionReturn(0); 6790 } 6791 6792 /*@C 6793 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6794 and "off-diagonal" part of the matrix in CSR format. 6795 6796 Collective 6797 6798 Input Parameters: 6799 + comm - MPI communicator 6800 . m - number of local rows (Cannot be PETSC_DECIDE) 6801 . n - This value should be the same as the local size used in creating the 6802 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6803 calculated if N is given) For square matrices n is almost always m. 6804 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6805 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6806 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6807 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6808 . a - matrix values 6809 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6810 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6811 - oa - matrix values 6812 6813 Output Parameter: 6814 . mat - the matrix 6815 6816 Level: advanced 6817 6818 Notes: 6819 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6820 must free the arrays once the matrix has been destroyed and not before. 6821 6822 The i and j indices are 0 based 6823 6824 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6825 6826 This sets local rows and cannot be used to set off-processor values. 6827 6828 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6829 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6830 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6831 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6832 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6833 communication if it is known that only local entries will be set. 6834 6835 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6836 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6837 @*/ 6838 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6839 { 6840 Mat_MPIAIJ *maij; 6841 6842 PetscFunctionBegin; 6843 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6844 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6845 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6846 PetscCall(MatCreate(comm,mat)); 6847 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6848 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6849 maij = (Mat_MPIAIJ*) (*mat)->data; 6850 6851 (*mat)->preallocated = PETSC_TRUE; 6852 6853 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6854 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6855 6856 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6857 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6858 6859 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6860 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6861 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6862 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6863 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6864 PetscFunctionReturn(0); 6865 } 6866 6867 typedef struct { 6868 Mat *mp; /* intermediate products */ 6869 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6870 PetscInt cp; /* number of intermediate products */ 6871 6872 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6873 PetscInt *startsj_s,*startsj_r; 6874 PetscScalar *bufa; 6875 Mat P_oth; 6876 6877 /* may take advantage of merging product->B */ 6878 Mat Bloc; /* B-local by merging diag and off-diag */ 6879 6880 /* cusparse does not have support to split between symbolic and numeric phases. 6881 When api_user is true, we don't need to update the numerical values 6882 of the temporary storage */ 6883 PetscBool reusesym; 6884 6885 /* support for COO values insertion */ 6886 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6887 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6888 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6889 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6890 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6891 PetscMemType mtype; 6892 6893 /* customization */ 6894 PetscBool abmerge; 6895 PetscBool P_oth_bind; 6896 } MatMatMPIAIJBACKEND; 6897 6898 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6899 { 6900 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6901 PetscInt i; 6902 6903 PetscFunctionBegin; 6904 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6905 PetscCall(PetscFree(mmdata->bufa)); 6906 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6907 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6908 PetscCall(MatDestroy(&mmdata->P_oth)); 6909 PetscCall(MatDestroy(&mmdata->Bloc)); 6910 PetscCall(PetscSFDestroy(&mmdata->sf)); 6911 for (i = 0; i < mmdata->cp; i++) { 6912 PetscCall(MatDestroy(&mmdata->mp[i])); 6913 } 6914 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6915 PetscCall(PetscFree(mmdata->own[0])); 6916 PetscCall(PetscFree(mmdata->own)); 6917 PetscCall(PetscFree(mmdata->off[0])); 6918 PetscCall(PetscFree(mmdata->off)); 6919 PetscCall(PetscFree(mmdata)); 6920 PetscFunctionReturn(0); 6921 } 6922 6923 /* Copy selected n entries with indices in idx[] of A to v[]. 6924 If idx is NULL, copy the whole data array of A to v[] 6925 */ 6926 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6927 { 6928 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6929 6930 PetscFunctionBegin; 6931 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6932 if (f) { 6933 PetscCall((*f)(A,n,idx,v)); 6934 } else { 6935 const PetscScalar *vv; 6936 6937 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6938 if (n && idx) { 6939 PetscScalar *w = v; 6940 const PetscInt *oi = idx; 6941 PetscInt j; 6942 6943 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6944 } else { 6945 PetscCall(PetscArraycpy(v,vv,n)); 6946 } 6947 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6948 } 6949 PetscFunctionReturn(0); 6950 } 6951 6952 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6953 { 6954 MatMatMPIAIJBACKEND *mmdata; 6955 PetscInt i,n_d,n_o; 6956 6957 PetscFunctionBegin; 6958 MatCheckProduct(C,1); 6959 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6960 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6961 if (!mmdata->reusesym) { /* update temporary matrices */ 6962 if (mmdata->P_oth) { 6963 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6964 } 6965 if (mmdata->Bloc) { 6966 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6967 } 6968 } 6969 mmdata->reusesym = PETSC_FALSE; 6970 6971 for (i = 0; i < mmdata->cp; i++) { 6972 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6973 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6974 } 6975 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6976 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6977 6978 if (mmdata->mptmp[i]) continue; 6979 if (noff) { 6980 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6981 6982 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6983 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6984 n_o += noff; 6985 n_d += nown; 6986 } else { 6987 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6988 6989 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6990 n_d += mm->nz; 6991 } 6992 } 6993 if (mmdata->hasoffproc) { /* offprocess insertion */ 6994 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6995 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6996 } 6997 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6998 PetscFunctionReturn(0); 6999 } 7000 7001 /* Support for Pt * A, A * P, or Pt * A * P */ 7002 #define MAX_NUMBER_INTERMEDIATE 4 7003 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7004 { 7005 Mat_Product *product = C->product; 7006 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7007 Mat_MPIAIJ *a,*p; 7008 MatMatMPIAIJBACKEND *mmdata; 7009 ISLocalToGlobalMapping P_oth_l2g = NULL; 7010 IS glob = NULL; 7011 const char *prefix; 7012 char pprefix[256]; 7013 const PetscInt *globidx,*P_oth_idx; 7014 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 7015 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 7016 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7017 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7018 /* a base offset; type-2: sparse with a local to global map table */ 7019 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7020 7021 MatProductType ptype; 7022 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 7023 PetscMPIInt size; 7024 7025 PetscFunctionBegin; 7026 MatCheckProduct(C,1); 7027 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 7028 ptype = product->type; 7029 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7030 ptype = MATPRODUCT_AB; 7031 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7032 } 7033 switch (ptype) { 7034 case MATPRODUCT_AB: 7035 A = product->A; 7036 P = product->B; 7037 m = A->rmap->n; 7038 n = P->cmap->n; 7039 M = A->rmap->N; 7040 N = P->cmap->N; 7041 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7042 break; 7043 case MATPRODUCT_AtB: 7044 P = product->A; 7045 A = product->B; 7046 m = P->cmap->n; 7047 n = A->cmap->n; 7048 M = P->cmap->N; 7049 N = A->cmap->N; 7050 hasoffproc = PETSC_TRUE; 7051 break; 7052 case MATPRODUCT_PtAP: 7053 A = product->A; 7054 P = product->B; 7055 m = P->cmap->n; 7056 n = P->cmap->n; 7057 M = P->cmap->N; 7058 N = P->cmap->N; 7059 hasoffproc = PETSC_TRUE; 7060 break; 7061 default: 7062 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7063 } 7064 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 7065 if (size == 1) hasoffproc = PETSC_FALSE; 7066 7067 /* defaults */ 7068 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 7069 mp[i] = NULL; 7070 mptmp[i] = PETSC_FALSE; 7071 rmapt[i] = -1; 7072 cmapt[i] = -1; 7073 rmapa[i] = NULL; 7074 cmapa[i] = NULL; 7075 } 7076 7077 /* customization */ 7078 PetscCall(PetscNew(&mmdata)); 7079 mmdata->reusesym = product->api_user; 7080 if (ptype == MATPRODUCT_AB) { 7081 if (product->api_user) { 7082 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 7083 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7084 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7085 PetscOptionsEnd(); 7086 } else { 7087 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7088 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7089 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7090 PetscOptionsEnd(); 7091 } 7092 } else if (ptype == MATPRODUCT_PtAP) { 7093 if (product->api_user) { 7094 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7095 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7096 PetscOptionsEnd(); 7097 } else { 7098 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7099 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7100 PetscOptionsEnd(); 7101 } 7102 } 7103 a = (Mat_MPIAIJ*)A->data; 7104 p = (Mat_MPIAIJ*)P->data; 7105 PetscCall(MatSetSizes(C,m,n,M,N)); 7106 PetscCall(PetscLayoutSetUp(C->rmap)); 7107 PetscCall(PetscLayoutSetUp(C->cmap)); 7108 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7109 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7110 7111 cp = 0; 7112 switch (ptype) { 7113 case MATPRODUCT_AB: /* A * P */ 7114 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7115 7116 /* A_diag * P_local (merged or not) */ 7117 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7118 /* P is product->B */ 7119 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7120 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7121 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7122 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7123 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7124 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7125 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7126 mp[cp]->product->api_user = product->api_user; 7127 PetscCall(MatProductSetFromOptions(mp[cp])); 7128 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7129 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7130 PetscCall(ISGetIndices(glob,&globidx)); 7131 rmapt[cp] = 1; 7132 cmapt[cp] = 2; 7133 cmapa[cp] = globidx; 7134 mptmp[cp] = PETSC_FALSE; 7135 cp++; 7136 } else { /* A_diag * P_diag and A_diag * P_off */ 7137 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7138 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7139 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7140 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7141 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7142 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7143 mp[cp]->product->api_user = product->api_user; 7144 PetscCall(MatProductSetFromOptions(mp[cp])); 7145 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7146 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7147 rmapt[cp] = 1; 7148 cmapt[cp] = 1; 7149 mptmp[cp] = PETSC_FALSE; 7150 cp++; 7151 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7152 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7153 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7154 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7155 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7156 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7157 mp[cp]->product->api_user = product->api_user; 7158 PetscCall(MatProductSetFromOptions(mp[cp])); 7159 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7160 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7161 rmapt[cp] = 1; 7162 cmapt[cp] = 2; 7163 cmapa[cp] = p->garray; 7164 mptmp[cp] = PETSC_FALSE; 7165 cp++; 7166 } 7167 7168 /* A_off * P_other */ 7169 if (mmdata->P_oth) { 7170 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7171 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7172 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7173 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7174 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7175 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7176 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7177 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7178 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7179 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7180 mp[cp]->product->api_user = product->api_user; 7181 PetscCall(MatProductSetFromOptions(mp[cp])); 7182 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7183 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7184 rmapt[cp] = 1; 7185 cmapt[cp] = 2; 7186 cmapa[cp] = P_oth_idx; 7187 mptmp[cp] = PETSC_FALSE; 7188 cp++; 7189 } 7190 break; 7191 7192 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7193 /* A is product->B */ 7194 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7195 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7196 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7197 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7198 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7199 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7200 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7201 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7202 mp[cp]->product->api_user = product->api_user; 7203 PetscCall(MatProductSetFromOptions(mp[cp])); 7204 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7205 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7206 PetscCall(ISGetIndices(glob,&globidx)); 7207 rmapt[cp] = 2; 7208 rmapa[cp] = globidx; 7209 cmapt[cp] = 2; 7210 cmapa[cp] = globidx; 7211 mptmp[cp] = PETSC_FALSE; 7212 cp++; 7213 } else { 7214 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7215 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7216 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7217 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7218 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7219 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7220 mp[cp]->product->api_user = product->api_user; 7221 PetscCall(MatProductSetFromOptions(mp[cp])); 7222 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7223 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7224 PetscCall(ISGetIndices(glob,&globidx)); 7225 rmapt[cp] = 1; 7226 cmapt[cp] = 2; 7227 cmapa[cp] = globidx; 7228 mptmp[cp] = PETSC_FALSE; 7229 cp++; 7230 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7231 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7232 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7233 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7234 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7235 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7236 mp[cp]->product->api_user = product->api_user; 7237 PetscCall(MatProductSetFromOptions(mp[cp])); 7238 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7239 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7240 rmapt[cp] = 2; 7241 rmapa[cp] = p->garray; 7242 cmapt[cp] = 2; 7243 cmapa[cp] = globidx; 7244 mptmp[cp] = PETSC_FALSE; 7245 cp++; 7246 } 7247 break; 7248 case MATPRODUCT_PtAP: 7249 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7250 /* P is product->B */ 7251 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7252 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7253 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7254 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7255 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7256 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7257 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7258 mp[cp]->product->api_user = product->api_user; 7259 PetscCall(MatProductSetFromOptions(mp[cp])); 7260 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 PetscCall(ISGetIndices(glob,&globidx)); 7263 rmapt[cp] = 2; 7264 rmapa[cp] = globidx; 7265 cmapt[cp] = 2; 7266 cmapa[cp] = globidx; 7267 mptmp[cp] = PETSC_FALSE; 7268 cp++; 7269 if (mmdata->P_oth) { 7270 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7271 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7272 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7273 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7274 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7275 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7276 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7277 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7278 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7279 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7280 mp[cp]->product->api_user = product->api_user; 7281 PetscCall(MatProductSetFromOptions(mp[cp])); 7282 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7283 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7284 mptmp[cp] = PETSC_TRUE; 7285 cp++; 7286 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7287 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7288 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7289 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7290 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7291 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7292 mp[cp]->product->api_user = product->api_user; 7293 PetscCall(MatProductSetFromOptions(mp[cp])); 7294 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7295 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7296 rmapt[cp] = 2; 7297 rmapa[cp] = globidx; 7298 cmapt[cp] = 2; 7299 cmapa[cp] = P_oth_idx; 7300 mptmp[cp] = PETSC_FALSE; 7301 cp++; 7302 } 7303 break; 7304 default: 7305 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7306 } 7307 /* sanity check */ 7308 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7309 7310 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7311 for (i = 0; i < cp; i++) { 7312 mmdata->mp[i] = mp[i]; 7313 mmdata->mptmp[i] = mptmp[i]; 7314 } 7315 mmdata->cp = cp; 7316 C->product->data = mmdata; 7317 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7318 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7319 7320 /* memory type */ 7321 mmdata->mtype = PETSC_MEMTYPE_HOST; 7322 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7323 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7324 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7325 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7326 7327 /* prepare coo coordinates for values insertion */ 7328 7329 /* count total nonzeros of those intermediate seqaij Mats 7330 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7331 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7332 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7333 */ 7334 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7335 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7336 if (mptmp[cp]) continue; 7337 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7338 const PetscInt *rmap = rmapa[cp]; 7339 const PetscInt mr = mp[cp]->rmap->n; 7340 const PetscInt rs = C->rmap->rstart; 7341 const PetscInt re = C->rmap->rend; 7342 const PetscInt *ii = mm->i; 7343 for (i = 0; i < mr; i++) { 7344 const PetscInt gr = rmap[i]; 7345 const PetscInt nz = ii[i+1] - ii[i]; 7346 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7347 else ncoo_oown += nz; /* this row is local */ 7348 } 7349 } else ncoo_d += mm->nz; 7350 } 7351 7352 /* 7353 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7354 7355 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7356 7357 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7358 7359 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7360 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7361 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7362 7363 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7364 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7365 */ 7366 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7367 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7368 7369 /* gather (i,j) of nonzeros inserted by remote procs */ 7370 if (hasoffproc) { 7371 PetscSF msf; 7372 PetscInt ncoo2,*coo_i2,*coo_j2; 7373 7374 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7375 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7376 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7377 7378 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7379 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7380 PetscInt *idxoff = mmdata->off[cp]; 7381 PetscInt *idxown = mmdata->own[cp]; 7382 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7383 const PetscInt *rmap = rmapa[cp]; 7384 const PetscInt *cmap = cmapa[cp]; 7385 const PetscInt *ii = mm->i; 7386 PetscInt *coi = coo_i + ncoo_o; 7387 PetscInt *coj = coo_j + ncoo_o; 7388 const PetscInt mr = mp[cp]->rmap->n; 7389 const PetscInt rs = C->rmap->rstart; 7390 const PetscInt re = C->rmap->rend; 7391 const PetscInt cs = C->cmap->rstart; 7392 for (i = 0; i < mr; i++) { 7393 const PetscInt *jj = mm->j + ii[i]; 7394 const PetscInt gr = rmap[i]; 7395 const PetscInt nz = ii[i+1] - ii[i]; 7396 if (gr < rs || gr >= re) { /* this is an offproc row */ 7397 for (j = ii[i]; j < ii[i+1]; j++) { 7398 *coi++ = gr; 7399 *idxoff++ = j; 7400 } 7401 if (!cmapt[cp]) { /* already global */ 7402 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7403 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7404 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7405 } else { /* offdiag */ 7406 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7407 } 7408 ncoo_o += nz; 7409 } else { /* this is a local row */ 7410 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7411 } 7412 } 7413 } 7414 mmdata->off[cp + 1] = idxoff; 7415 mmdata->own[cp + 1] = idxown; 7416 } 7417 7418 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7419 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7420 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7421 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7422 ncoo = ncoo_d + ncoo_oown + ncoo2; 7423 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7424 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7425 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7426 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7427 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7428 PetscCall(PetscFree2(coo_i,coo_j)); 7429 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7430 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7431 coo_i = coo_i2; 7432 coo_j = coo_j2; 7433 } else { /* no offproc values insertion */ 7434 ncoo = ncoo_d; 7435 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7436 7437 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7438 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7439 PetscCall(PetscSFSetUp(mmdata->sf)); 7440 } 7441 mmdata->hasoffproc = hasoffproc; 7442 7443 /* gather (i,j) of nonzeros inserted locally */ 7444 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7445 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7446 PetscInt *coi = coo_i + ncoo_d; 7447 PetscInt *coj = coo_j + ncoo_d; 7448 const PetscInt *jj = mm->j; 7449 const PetscInt *ii = mm->i; 7450 const PetscInt *cmap = cmapa[cp]; 7451 const PetscInt *rmap = rmapa[cp]; 7452 const PetscInt mr = mp[cp]->rmap->n; 7453 const PetscInt rs = C->rmap->rstart; 7454 const PetscInt re = C->rmap->rend; 7455 const PetscInt cs = C->cmap->rstart; 7456 7457 if (mptmp[cp]) continue; 7458 if (rmapt[cp] == 1) { /* consecutive rows */ 7459 /* fill coo_i */ 7460 for (i = 0; i < mr; i++) { 7461 const PetscInt gr = i + rs; 7462 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7463 } 7464 /* fill coo_j */ 7465 if (!cmapt[cp]) { /* type-0, already global */ 7466 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7467 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7468 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7469 } else { /* type-2, local to global for sparse columns */ 7470 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7471 } 7472 ncoo_d += mm->nz; 7473 } else if (rmapt[cp] == 2) { /* sparse rows */ 7474 for (i = 0; i < mr; i++) { 7475 const PetscInt *jj = mm->j + ii[i]; 7476 const PetscInt gr = rmap[i]; 7477 const PetscInt nz = ii[i+1] - ii[i]; 7478 if (gr >= rs && gr < re) { /* local rows */ 7479 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7480 if (!cmapt[cp]) { /* type-0, already global */ 7481 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7482 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7483 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7484 } else { /* type-2, local to global for sparse columns */ 7485 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7486 } 7487 ncoo_d += nz; 7488 } 7489 } 7490 } 7491 } 7492 if (glob) { 7493 PetscCall(ISRestoreIndices(glob,&globidx)); 7494 } 7495 PetscCall(ISDestroy(&glob)); 7496 if (P_oth_l2g) { 7497 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7498 } 7499 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7500 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7501 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7502 7503 /* preallocate with COO data */ 7504 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7505 PetscCall(PetscFree2(coo_i,coo_j)); 7506 PetscFunctionReturn(0); 7507 } 7508 7509 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7510 { 7511 Mat_Product *product = mat->product; 7512 #if defined(PETSC_HAVE_DEVICE) 7513 PetscBool match = PETSC_FALSE; 7514 PetscBool usecpu = PETSC_FALSE; 7515 #else 7516 PetscBool match = PETSC_TRUE; 7517 #endif 7518 7519 PetscFunctionBegin; 7520 MatCheckProduct(mat,1); 7521 #if defined(PETSC_HAVE_DEVICE) 7522 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7523 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7524 } 7525 if (match) { /* we can always fallback to the CPU if requested */ 7526 switch (product->type) { 7527 case MATPRODUCT_AB: 7528 if (product->api_user) { 7529 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7530 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7531 PetscOptionsEnd(); 7532 } else { 7533 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7534 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7535 PetscOptionsEnd(); 7536 } 7537 break; 7538 case MATPRODUCT_AtB: 7539 if (product->api_user) { 7540 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7541 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7542 PetscOptionsEnd(); 7543 } else { 7544 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7545 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7546 PetscOptionsEnd(); 7547 } 7548 break; 7549 case MATPRODUCT_PtAP: 7550 if (product->api_user) { 7551 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7552 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7553 PetscOptionsEnd(); 7554 } else { 7555 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7556 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7557 PetscOptionsEnd(); 7558 } 7559 break; 7560 default: 7561 break; 7562 } 7563 match = (PetscBool)!usecpu; 7564 } 7565 #endif 7566 if (match) { 7567 switch (product->type) { 7568 case MATPRODUCT_AB: 7569 case MATPRODUCT_AtB: 7570 case MATPRODUCT_PtAP: 7571 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7572 break; 7573 default: 7574 break; 7575 } 7576 } 7577 /* fallback to MPIAIJ ops */ 7578 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7579 PetscFunctionReturn(0); 7580 } 7581 7582 /* 7583 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7584 7585 n - the number of block indices in cc[] 7586 cc - the block indices (must be large enough to contain the indices) 7587 */ 7588 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7589 { 7590 PetscInt cnt = -1,nidx,j; 7591 const PetscInt *idx; 7592 7593 PetscFunctionBegin; 7594 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7595 if (nidx) { 7596 cnt = 0; 7597 cc[cnt] = idx[0]/bs; 7598 for (j=1; j<nidx; j++) { 7599 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7600 } 7601 } 7602 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7603 *n = cnt+1; 7604 PetscFunctionReturn(0); 7605 } 7606 7607 /* 7608 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7609 7610 ncollapsed - the number of block indices 7611 collapsed - the block indices (must be large enough to contain the indices) 7612 */ 7613 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7614 { 7615 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7616 7617 PetscFunctionBegin; 7618 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7619 for (i=start+1; i<start+bs; i++) { 7620 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7621 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7622 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7623 } 7624 *ncollapsed = nprev; 7625 if (collapsed) *collapsed = cprev; 7626 PetscFunctionReturn(0); 7627 } 7628 7629 /* -------------------------------------------------------------------------- */ 7630 /* 7631 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7632 7633 Input Parameter: 7634 . Amat - matrix 7635 - symmetrize - make the result symmetric 7636 + scale - scale with diagonal 7637 7638 Output Parameter: 7639 . a_Gmat - output scalar graph >= 0 7640 7641 */ 7642 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7643 { 7644 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7645 MPI_Comm comm; 7646 Mat Gmat; 7647 PetscBool ismpiaij,isseqaij; 7648 Mat a, b, c; 7649 MatType jtype; 7650 7651 PetscFunctionBegin; 7652 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7653 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7654 PetscCall(MatGetSize(Amat, &MM, &NN)); 7655 PetscCall(MatGetBlockSize(Amat, &bs)); 7656 nloc = (Iend-Istart)/bs; 7657 7658 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7659 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7660 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7661 7662 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7663 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7664 implementation */ 7665 if (bs > 1) { 7666 PetscCall(MatGetType(Amat,&jtype)); 7667 PetscCall(MatCreate(comm, &Gmat)); 7668 PetscCall(MatSetType(Gmat, jtype)); 7669 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7670 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7671 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7672 PetscInt *d_nnz, *o_nnz; 7673 MatScalar *aa,val,AA[4096]; 7674 PetscInt *aj,*ai,AJ[4096],nc; 7675 if (isseqaij) { a = Amat; b = NULL; } 7676 else { 7677 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7678 a = d->A; b = d->B; 7679 } 7680 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7681 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7682 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7683 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7684 const PetscInt *cols; 7685 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7686 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7687 nnz[brow/bs] = jj/bs; 7688 if (jj%bs) ok = 0; 7689 if (cols) j0 = cols[0]; 7690 else j0 = -1; 7691 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7692 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7693 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7694 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7695 if (jj%bs) ok = 0; 7696 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7697 if (nnz[brow/bs] != jj/bs) ok = 0; 7698 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7699 } 7700 if (!ok) { 7701 PetscCall(PetscFree2(d_nnz,o_nnz)); 7702 goto old_bs; 7703 } 7704 } 7705 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7706 } 7707 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7708 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7709 PetscCall(PetscFree2(d_nnz,o_nnz)); 7710 // diag 7711 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7712 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7713 ai = aseq->i; 7714 n = ai[brow+1] - ai[brow]; 7715 aj = aseq->j + ai[brow]; 7716 for (int k=0; k<n; k += bs) { // block columns 7717 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7718 val = 0; 7719 for (int ii=0; ii<bs; ii++) { // rows in block 7720 aa = aseq->a + ai[brow+ii] + k; 7721 for (int jj=0; jj<bs; jj++) { // columns in block 7722 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7723 } 7724 } 7725 AA[k/bs] = val; 7726 } 7727 grow = Istart/bs + brow/bs; 7728 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7729 } 7730 // off-diag 7731 if (ismpiaij) { 7732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7733 const PetscScalar *vals; 7734 const PetscInt *cols, *garray = aij->garray; 7735 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7736 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7737 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7738 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7739 AA[k/bs] = 0; 7740 AJ[cidx] = garray[cols[k]]/bs; 7741 } 7742 nc = ncols/bs; 7743 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7744 for (int ii=0; ii<bs; ii++) { // rows in block 7745 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7746 for (int k=0; k<ncols; k += bs) { 7747 for (int jj=0; jj<bs; jj++) { // cols in block 7748 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7749 } 7750 } 7751 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7752 } 7753 grow = Istart/bs + brow/bs; 7754 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7755 } 7756 } 7757 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7758 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7759 } else { 7760 const PetscScalar *vals; 7761 const PetscInt *idx; 7762 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7763 old_bs: 7764 /* 7765 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7766 */ 7767 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7768 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7769 if (isseqaij) { 7770 PetscInt max_d_nnz; 7771 /* 7772 Determine exact preallocation count for (sequential) scalar matrix 7773 */ 7774 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7775 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7776 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7777 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7778 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7779 } 7780 PetscCall(PetscFree3(w0,w1,w2)); 7781 } else if (ismpiaij) { 7782 Mat Daij,Oaij; 7783 const PetscInt *garray; 7784 PetscInt max_d_nnz; 7785 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7786 /* 7787 Determine exact preallocation count for diagonal block portion of scalar matrix 7788 */ 7789 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7790 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7791 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7792 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7793 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7794 } 7795 PetscCall(PetscFree3(w0,w1,w2)); 7796 /* 7797 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7798 */ 7799 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7800 o_nnz[jj] = 0; 7801 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7802 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7803 o_nnz[jj] += ncols; 7804 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7805 } 7806 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7807 } 7808 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7809 /* get scalar copy (norms) of matrix */ 7810 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7811 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7812 PetscCall(PetscFree2(d_nnz,o_nnz)); 7813 for (Ii = Istart; Ii < Iend; Ii++) { 7814 PetscInt dest_row = Ii/bs; 7815 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7816 for (jj=0; jj<ncols; jj++) { 7817 PetscInt dest_col = idx[jj]/bs; 7818 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7819 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7820 } 7821 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7822 } 7823 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7824 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7825 } 7826 } else { 7827 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7828 procedure via MatAbs API */ 7829 /* just copy scalar matrix & abs() */ 7830 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7831 if (isseqaij) { a = Gmat; b = NULL; } 7832 else { 7833 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7834 a = d->A; b = d->B; 7835 } 7836 /* abs */ 7837 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7838 MatInfo info; 7839 PetscScalar *avals; 7840 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7841 PetscCall(MatSeqAIJGetArray(c,&avals)); 7842 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7843 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7844 } 7845 } 7846 if (symmetrize) { 7847 PetscBool isset,issym; 7848 PetscCall(MatIsSymmetricKnown(Amat,&isset,&issym)); 7849 if (!isset || !issym) { 7850 Mat matTrans; 7851 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7852 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7853 PetscCall(MatDestroy(&matTrans)); 7854 } 7855 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7856 } else { 7857 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7858 } 7859 if (scale) { 7860 /* scale c for all diagonal values = 1 or -1 */ 7861 Vec diag; 7862 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7863 PetscCall(MatGetDiagonal(Gmat, diag)); 7864 PetscCall(VecReciprocal(diag)); 7865 PetscCall(VecSqrtAbs(diag)); 7866 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7867 PetscCall(VecDestroy(&diag)); 7868 } 7869 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7870 *a_Gmat = Gmat; 7871 PetscFunctionReturn(0); 7872 } 7873 7874 /* -------------------------------------------------------------------------- */ 7875 /*@C 7876 MatFilter_AIJ - filter values with small absolute values 7877 With vfilter < 0 does nothing so should not be called. 7878 7879 Collective on Mat 7880 7881 Input Parameters: 7882 + Gmat - the graph 7883 . vfilter - threshold parameter [0,1) 7884 7885 Output Parameter: 7886 . filteredG - output filtered scalar graph 7887 7888 Level: developer 7889 7890 Notes: 7891 This is called before graph coarsers are called. 7892 This could go into Mat, move 'symm' to GAMG 7893 7894 .seealso: `PCGAMGSetThreshold()` 7895 @*/ 7896 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7897 { 7898 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7899 Mat tGmat; 7900 MPI_Comm comm; 7901 const PetscScalar *vals; 7902 const PetscInt *idx; 7903 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7904 MatScalar *AA; // this is checked in graph 7905 PetscBool isseqaij; 7906 Mat a, b, c; 7907 MatType jtype; 7908 7909 PetscFunctionBegin; 7910 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7911 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7912 PetscCall(MatGetType(Gmat,&jtype)); 7913 PetscCall(MatCreate(comm, &tGmat)); 7914 PetscCall(MatSetType(tGmat, jtype)); 7915 7916 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7917 Also, if the matrix is symmetric, can we skip this 7918 operation? It can be very expensive on large matrices. */ 7919 7920 // global sizes 7921 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7922 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7923 nloc = Iend - Istart; 7924 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7925 if (isseqaij) { a = Gmat; b = NULL; } 7926 else { 7927 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7928 a = d->A; b = d->B; 7929 garray = d->garray; 7930 } 7931 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7932 for (PetscInt row=0; row < nloc; row++) { 7933 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7934 d_nnz[row] = ncols; 7935 if (ncols>maxcols) maxcols=ncols; 7936 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7937 } 7938 if (b) { 7939 for (PetscInt row=0; row < nloc; row++) { 7940 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7941 o_nnz[row] = ncols; 7942 if (ncols>maxcols) maxcols=ncols; 7943 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7944 } 7945 } 7946 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7947 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7948 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7949 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7950 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7951 PetscCall(PetscFree2(d_nnz,o_nnz)); 7952 // 7953 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7954 nnz0 = nnz1 = 0; 7955 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7956 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7957 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7958 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7959 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7960 if (PetscRealPart(sv) > vfilter) { 7961 nnz1++; 7962 PetscInt cid = idx[jj] + Istart; //diag 7963 if (c!=a) cid = garray[idx[jj]]; 7964 AA[ncol_row] = vals[jj]; 7965 AJ[ncol_row] = cid; 7966 ncol_row++; 7967 } 7968 } 7969 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7970 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7971 } 7972 } 7973 PetscCall(PetscFree2(AA,AJ)); 7974 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7975 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7976 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7977 7978 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7979 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7980 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7981 7982 *filteredG = tGmat; 7983 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7984 PetscFunctionReturn(0); 7985 } 7986 7987 /* 7988 Special version for direct calls from Fortran 7989 */ 7990 #include <petsc/private/fortranimpl.h> 7991 7992 /* Change these macros so can be used in void function */ 7993 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7994 #undef PetscCall 7995 #define PetscCall(...) do { \ 7996 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7997 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7998 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7999 return; \ 8000 } \ 8001 } while (0) 8002 8003 #undef SETERRQ 8004 #define SETERRQ(comm,ierr,...) do { \ 8005 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 8006 return; \ 8007 } while (0) 8008 8009 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8010 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8011 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8012 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8013 #else 8014 #endif 8015 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 8016 { 8017 Mat mat = *mmat; 8018 PetscInt m = *mm, n = *mn; 8019 InsertMode addv = *maddv; 8020 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 8021 PetscScalar value; 8022 8023 MatCheckPreallocated(mat,1); 8024 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8025 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 8026 { 8027 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 8028 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 8029 PetscBool roworiented = aij->roworiented; 8030 8031 /* Some Variables required in the macro */ 8032 Mat A = aij->A; 8033 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 8034 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 8035 MatScalar *aa; 8036 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8037 Mat B = aij->B; 8038 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 8039 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 8040 MatScalar *ba; 8041 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8042 * cannot use "#if defined" inside a macro. */ 8043 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8044 8045 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 8046 PetscInt nonew = a->nonew; 8047 MatScalar *ap1,*ap2; 8048 8049 PetscFunctionBegin; 8050 PetscCall(MatSeqAIJGetArray(A,&aa)); 8051 PetscCall(MatSeqAIJGetArray(B,&ba)); 8052 for (i=0; i<m; i++) { 8053 if (im[i] < 0) continue; 8054 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 8055 if (im[i] >= rstart && im[i] < rend) { 8056 row = im[i] - rstart; 8057 lastcol1 = -1; 8058 rp1 = aj + ai[row]; 8059 ap1 = aa + ai[row]; 8060 rmax1 = aimax[row]; 8061 nrow1 = ailen[row]; 8062 low1 = 0; 8063 high1 = nrow1; 8064 lastcol2 = -1; 8065 rp2 = bj + bi[row]; 8066 ap2 = ba + bi[row]; 8067 rmax2 = bimax[row]; 8068 nrow2 = bilen[row]; 8069 low2 = 0; 8070 high2 = nrow2; 8071 8072 for (j=0; j<n; j++) { 8073 if (roworiented) value = v[i*n+j]; 8074 else value = v[i+j*m]; 8075 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8076 if (in[j] >= cstart && in[j] < cend) { 8077 col = in[j] - cstart; 8078 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 8079 } else if (in[j] < 0) continue; 8080 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8081 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8082 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 8083 } else { 8084 if (mat->was_assembled) { 8085 if (!aij->colmap) { 8086 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8087 } 8088 #if defined(PETSC_USE_CTABLE) 8089 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 8090 col--; 8091 #else 8092 col = aij->colmap[in[j]] - 1; 8093 #endif 8094 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8095 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8096 col = in[j]; 8097 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8098 B = aij->B; 8099 b = (Mat_SeqAIJ*)B->data; 8100 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8101 rp2 = bj + bi[row]; 8102 ap2 = ba + bi[row]; 8103 rmax2 = bimax[row]; 8104 nrow2 = bilen[row]; 8105 low2 = 0; 8106 high2 = nrow2; 8107 bm = aij->B->rmap->n; 8108 ba = b->a; 8109 inserted = PETSC_FALSE; 8110 } 8111 } else col = in[j]; 8112 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8113 } 8114 } 8115 } else if (!aij->donotstash) { 8116 if (roworiented) { 8117 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8118 } else { 8119 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8120 } 8121 } 8122 } 8123 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8124 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8125 } 8126 PetscFunctionReturnVoid(); 8127 } 8128 8129 /* Undefining these here since they were redefined from their original definition above! No 8130 * other PETSc functions should be defined past this point, as it is impossible to recover the 8131 * original definitions */ 8132 #undef PetscCall 8133 #undef SETERRQ 8134