1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) { 486 continue; 487 } else { 488 PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 489 if (mat->was_assembled) { 490 if (!aij->colmap) { 491 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 492 } 493 #if defined(PETSC_USE_CTABLE) 494 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 495 col--; 496 #else 497 col = aij->colmap[in[j]] - 1; 498 #endif 499 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 500 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 501 col = in[j]; 502 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 503 B = aij->B; 504 b = (Mat_SeqAIJ*)B->data; 505 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 506 rp2 = bj + bi[row]; 507 ap2 = ba + bi[row]; 508 rmax2 = bimax[row]; 509 nrow2 = bilen[row]; 510 low2 = 0; 511 high2 = nrow2; 512 bm = aij->B->rmap->n; 513 ba = b->a; 514 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 515 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 516 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 517 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 518 } 519 } else col = in[j]; 520 nonew = b->nonew; 521 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 522 } 523 } 524 } else { 525 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 526 if (!aij->donotstash) { 527 mat->assembled = PETSC_FALSE; 528 if (roworiented) { 529 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } else { 531 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 532 } 533 } 534 } 535 } 536 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 537 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 538 PetscFunctionReturn(0); 539 } 540 541 /* 542 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 543 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 544 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 545 */ 546 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 547 { 548 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 549 Mat A = aij->A; /* diagonal part of the matrix */ 550 Mat B = aij->B; /* offdiagonal part of the matrix */ 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 552 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 554 PetscInt *ailen = a->ilen,*aj = a->j; 555 PetscInt *bilen = b->ilen,*bj = b->j; 556 PetscInt am = aij->A->rmap->n,j; 557 PetscInt diag_so_far = 0,dnz; 558 PetscInt offd_so_far = 0,onz; 559 560 PetscFunctionBegin; 561 /* Iterate over all rows of the matrix */ 562 for (j=0; j<am; j++) { 563 dnz = onz = 0; 564 /* Iterate over all non-zero columns of the current row */ 565 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 566 /* If column is in the diagonal */ 567 if (mat_j[col] >= cstart && mat_j[col] < cend) { 568 aj[diag_so_far++] = mat_j[col] - cstart; 569 dnz++; 570 } else { /* off-diagonal entries */ 571 bj[offd_so_far++] = mat_j[col]; 572 onz++; 573 } 574 } 575 ailen[j] = dnz; 576 bilen[j] = onz; 577 } 578 PetscFunctionReturn(0); 579 } 580 581 /* 582 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 583 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 584 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 585 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 586 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 587 */ 588 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 589 { 590 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 591 Mat A = aij->A; /* diagonal part of the matrix */ 592 Mat B = aij->B; /* offdiagonal part of the matrix */ 593 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 594 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 595 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 596 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 597 PetscInt *ailen = a->ilen,*aj = a->j; 598 PetscInt *bilen = b->ilen,*bj = b->j; 599 PetscInt am = aij->A->rmap->n,j; 600 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 601 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 602 PetscScalar *aa = a->a,*ba = b->a; 603 604 PetscFunctionBegin; 605 /* Iterate over all rows of the matrix */ 606 for (j=0; j<am; j++) { 607 dnz_row = onz_row = 0; 608 rowstart_offd = full_offd_i[j]; 609 rowstart_diag = full_diag_i[j]; 610 /* Iterate over all non-zero columns of the current row */ 611 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 612 /* If column is in the diagonal */ 613 if (mat_j[col] >= cstart && mat_j[col] < cend) { 614 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 615 aa[rowstart_diag+dnz_row] = mat_a[col]; 616 dnz_row++; 617 } else { /* off-diagonal entries */ 618 bj[rowstart_offd+onz_row] = mat_j[col]; 619 ba[rowstart_offd+onz_row] = mat_a[col]; 620 onz_row++; 621 } 622 } 623 ailen[j] = dnz_row; 624 bilen[j] = onz_row; 625 } 626 PetscFunctionReturn(0); 627 } 628 629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* negative row */ 638 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* negative column */ 643 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 647 } else { 648 if (!aij->colmap) { 649 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 PetscInt nstash,reallocs; 672 673 PetscFunctionBegin; 674 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 675 676 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 677 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 678 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 PetscMPIInt n; 686 PetscInt i,j,rstart,ncols,flg; 687 PetscInt *row,*col; 688 PetscBool other_disassembled; 689 PetscScalar *val; 690 691 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 692 693 PetscFunctionBegin; 694 if (!aij->donotstash && !mat->nooffprocentries) { 695 while (1) { 696 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 697 if (!flg) break; 698 699 for (i=0; i<n;) { 700 /* Now identify the consecutive vals belonging to the same row */ 701 for (j=i,rstart=row[j]; j<n; j++) { 702 if (row[j] != rstart) break; 703 } 704 if (j < n) ncols = j-i; 705 else ncols = n-i; 706 /* Now assemble all these values with a single function call */ 707 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 708 i = j; 709 } 710 } 711 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 712 } 713 #if defined(PETSC_HAVE_DEVICE) 714 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 715 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 716 if (mat->boundtocpu) { 717 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 718 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 719 } 720 #endif 721 PetscCall(MatAssemblyBegin(aij->A,mode)); 722 PetscCall(MatAssemblyEnd(aij->A,mode)); 723 724 /* determine if any processor has disassembled, if so we must 725 also disassemble ourself, in order that we may reassemble. */ 726 /* 727 if nonzero structure of submatrix B cannot change then we know that 728 no processor disassembled thus we can skip this stuff 729 */ 730 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 731 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 732 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 733 PetscCall(MatDisAssemble_MPIAIJ(mat)); 734 } 735 } 736 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 738 } 739 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 740 #if defined(PETSC_HAVE_DEVICE) 741 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 742 #endif 743 PetscCall(MatAssemblyBegin(aij->B,mode)); 744 PetscCall(MatAssemblyEnd(aij->B,mode)); 745 746 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 747 748 aij->rowvalues = NULL; 749 750 PetscCall(VecDestroy(&aij->diag)); 751 752 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 753 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 754 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 755 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 756 } 757 #if defined(PETSC_HAVE_DEVICE) 758 mat->offloadmask = PETSC_OFFLOAD_BOTH; 759 #endif 760 PetscFunctionReturn(0); 761 } 762 763 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 764 { 765 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 766 767 PetscFunctionBegin; 768 PetscCall(MatZeroEntries(l->A)); 769 PetscCall(MatZeroEntries(l->B)); 770 PetscFunctionReturn(0); 771 } 772 773 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 774 { 775 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 776 PetscObjectState sA, sB; 777 PetscInt *lrows; 778 PetscInt r, len; 779 PetscBool cong, lch, gch; 780 781 PetscFunctionBegin; 782 /* get locally owned rows */ 783 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 784 PetscCall(MatHasCongruentLayouts(A,&cong)); 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 791 PetscCall(VecGetArrayRead(x, &xx)); 792 PetscCall(VecGetArray(b, &bb)); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 PetscCall(VecRestoreArrayRead(x, &xx)); 795 PetscCall(VecRestoreArray(b, &bb)); 796 } 797 798 sA = mat->A->nonzerostate; 799 sB = mat->B->nonzerostate; 800 801 if (diag != 0.0 && cong) { 802 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 803 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 804 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 805 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 806 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 807 PetscInt nnwA, nnwB; 808 PetscBool nnzA, nnzB; 809 810 nnwA = aijA->nonew; 811 nnwB = aijB->nonew; 812 nnzA = aijA->keepnonzeropattern; 813 nnzB = aijB->keepnonzeropattern; 814 if (!nnzA) { 815 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 816 aijA->nonew = 0; 817 } 818 if (!nnzB) { 819 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 820 aijB->nonew = 0; 821 } 822 /* Must zero here before the next loop */ 823 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 824 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 825 for (r = 0; r < len; ++r) { 826 const PetscInt row = lrows[r] + A->rmap->rstart; 827 if (row >= A->cmap->N) continue; 828 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 829 } 830 aijA->nonew = nnwA; 831 aijB->nonew = nnwB; 832 } else { 833 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 834 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 835 } 836 PetscCall(PetscFree(lrows)); 837 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 839 840 /* reduce nonzerostate */ 841 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 842 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 843 if (gch) A->nonzerostate++; 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 848 { 849 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 850 PetscMPIInt n = A->rmap->n; 851 PetscInt i,j,r,m,len = 0; 852 PetscInt *lrows,*owners = A->rmap->range; 853 PetscMPIInt p = 0; 854 PetscSFNode *rrows; 855 PetscSF sf; 856 const PetscScalar *xx; 857 PetscScalar *bb,*mask,*aij_a; 858 Vec xmask,lmask; 859 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 860 const PetscInt *aj, *ii,*ridx; 861 PetscScalar *aa; 862 863 PetscFunctionBegin; 864 /* Create SF where leaves are input rows and roots are owned rows */ 865 PetscCall(PetscMalloc1(n, &lrows)); 866 for (r = 0; r < n; ++r) lrows[r] = -1; 867 PetscCall(PetscMalloc1(N, &rrows)); 868 for (r = 0; r < N; ++r) { 869 const PetscInt idx = rows[r]; 870 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 871 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 872 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 873 } 874 rrows[r].rank = p; 875 rrows[r].index = rows[r] - owners[p]; 876 } 877 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 878 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 879 /* Collect flags for rows to be zeroed */ 880 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 882 PetscCall(PetscSFDestroy(&sf)); 883 /* Compress and put in row numbers */ 884 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 885 /* zero diagonal part of matrix */ 886 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 887 /* handle off diagonal part of matrix */ 888 PetscCall(MatCreateVecs(A,&xmask,NULL)); 889 PetscCall(VecDuplicate(l->lvec,&lmask)); 890 PetscCall(VecGetArray(xmask,&bb)); 891 for (i=0; i<len; i++) bb[lrows[i]] = 1; 892 PetscCall(VecRestoreArray(xmask,&bb)); 893 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecDestroy(&xmask)); 896 if (x && b) { /* this code is buggy when the row and column layout don't match */ 897 PetscBool cong; 898 899 PetscCall(MatHasCongruentLayouts(A,&cong)); 900 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 901 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 903 PetscCall(VecGetArrayRead(l->lvec,&xx)); 904 PetscCall(VecGetArray(b,&bb)); 905 } 906 PetscCall(VecGetArray(lmask,&mask)); 907 /* remove zeroed rows of off diagonal matrix */ 908 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 909 ii = aij->i; 910 for (i=0; i<len; i++) { 911 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 912 } 913 /* loop over all elements of off process part of matrix zeroing removed columns*/ 914 if (aij->compressedrow.use) { 915 m = aij->compressedrow.nrows; 916 ii = aij->compressedrow.i; 917 ridx = aij->compressedrow.rindex; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij_a + ii[i]; 922 923 for (j=0; j<n; j++) { 924 if (PetscAbsScalar(mask[*aj])) { 925 if (b) bb[*ridx] -= *aa*xx[*aj]; 926 *aa = 0.0; 927 } 928 aa++; 929 aj++; 930 } 931 ridx++; 932 } 933 } else { /* do not use compressed row format */ 934 m = l->B->rmap->n; 935 for (i=0; i<m; i++) { 936 n = ii[i+1] - ii[i]; 937 aj = aij->j + ii[i]; 938 aa = aij_a + ii[i]; 939 for (j=0; j<n; j++) { 940 if (PetscAbsScalar(mask[*aj])) { 941 if (b) bb[i] -= *aa*xx[*aj]; 942 *aa = 0.0; 943 } 944 aa++; 945 aj++; 946 } 947 } 948 } 949 if (x && b) { 950 PetscCall(VecRestoreArray(b,&bb)); 951 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 952 } 953 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 954 PetscCall(VecRestoreArray(lmask,&mask)); 955 PetscCall(VecDestroy(&lmask)); 956 PetscCall(PetscFree(lrows)); 957 958 /* only change matrix nonzero state if pattern was allowed to be changed */ 959 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 960 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 961 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 962 } 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscInt nt; 970 VecScatter Mvctx = a->Mvctx; 971 972 PetscFunctionBegin; 973 PetscCall(VecGetLocalSize(xx,&nt)); 974 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 975 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 976 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 977 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 978 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 986 PetscFunctionBegin; 987 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 988 PetscFunctionReturn(0); 989 } 990 991 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 VecScatter Mvctx = a->Mvctx; 995 996 PetscFunctionBegin; 997 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 998 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 999 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1000 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1001 PetscFunctionReturn(0); 1002 } 1003 1004 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1005 { 1006 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1007 1008 PetscFunctionBegin; 1009 /* do nondiagonal part */ 1010 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1011 /* do local part */ 1012 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1013 /* add partial results together */ 1014 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1020 { 1021 MPI_Comm comm; 1022 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1023 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1024 IS Me,Notme; 1025 PetscInt M,N,first,last,*notme,i; 1026 PetscBool lf; 1027 PetscMPIInt size; 1028 1029 PetscFunctionBegin; 1030 /* Easy test: symmetric diagonal block */ 1031 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1032 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1033 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1034 if (!*f) PetscFunctionReturn(0); 1035 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1036 PetscCallMPI(MPI_Comm_size(comm,&size)); 1037 if (size == 1) PetscFunctionReturn(0); 1038 1039 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1040 PetscCall(MatGetSize(Amat,&M,&N)); 1041 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1042 PetscCall(PetscMalloc1(N-last+first,¬me)); 1043 for (i=0; i<first; i++) notme[i] = i; 1044 for (i=last; i<M; i++) notme[i-last+first] = i; 1045 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1046 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1047 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1048 Aoff = Aoffs[0]; 1049 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1050 Boff = Boffs[0]; 1051 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1052 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1053 PetscCall(MatDestroyMatrices(1,&Boffs)); 1054 PetscCall(ISDestroy(&Me)); 1055 PetscCall(ISDestroy(&Notme)); 1056 PetscCall(PetscFree(notme)); 1057 PetscFunctionReturn(0); 1058 } 1059 1060 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1061 { 1062 PetscFunctionBegin; 1063 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 1071 PetscFunctionBegin; 1072 /* do nondiagonal part */ 1073 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1074 /* do local part */ 1075 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1076 /* add partial results together */ 1077 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1087 { 1088 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1092 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1093 PetscCall(MatGetDiagonal(a->A,v)); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 1101 PetscFunctionBegin; 1102 PetscCall(MatScale(a->A,aa)); 1103 PetscCall(MatScale(a->B,aa)); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1108 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1109 { 1110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1111 1112 PetscFunctionBegin; 1113 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1114 PetscCall(PetscFree(aij->Aperm1)); 1115 PetscCall(PetscFree(aij->Bperm1)); 1116 PetscCall(PetscFree(aij->Ajmap1)); 1117 PetscCall(PetscFree(aij->Bjmap1)); 1118 1119 PetscCall(PetscFree(aij->Aimap2)); 1120 PetscCall(PetscFree(aij->Bimap2)); 1121 PetscCall(PetscFree(aij->Aperm2)); 1122 PetscCall(PetscFree(aij->Bperm2)); 1123 PetscCall(PetscFree(aij->Ajmap2)); 1124 PetscCall(PetscFree(aij->Bjmap2)); 1125 1126 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1127 PetscCall(PetscFree(aij->Cperm1)); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1132 { 1133 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1134 1135 PetscFunctionBegin; 1136 #if defined(PETSC_USE_LOG) 1137 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1138 #endif 1139 PetscCall(MatStashDestroy_Private(&mat->stash)); 1140 PetscCall(VecDestroy(&aij->diag)); 1141 PetscCall(MatDestroy(&aij->A)); 1142 PetscCall(MatDestroy(&aij->B)); 1143 #if defined(PETSC_USE_CTABLE) 1144 PetscCall(PetscTableDestroy(&aij->colmap)); 1145 #else 1146 PetscCall(PetscFree(aij->colmap)); 1147 #endif 1148 PetscCall(PetscFree(aij->garray)); 1149 PetscCall(VecDestroy(&aij->lvec)); 1150 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1151 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1152 PetscCall(PetscFree(aij->ld)); 1153 1154 /* Free COO */ 1155 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1156 1157 PetscCall(PetscFree(mat->data)); 1158 1159 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1160 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1161 1162 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1172 #if defined(PETSC_HAVE_CUDA) 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1174 #endif 1175 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1177 #endif 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1179 #if defined(PETSC_HAVE_ELEMENTAL) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_SCALAPACK) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1184 #endif 1185 #if defined(PETSC_HAVE_HYPRE) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1188 #endif 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1195 #if defined(PETSC_HAVE_MKL_SPARSE) 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1197 #endif 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa,*ba; 1213 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; header[2] = N; header[3] = nz; 1231 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1232 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1233 1234 /* fill in and store row lengths */ 1235 PetscCall(PetscMalloc1(m,&rowlens)); 1236 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1237 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1238 PetscCall(PetscFree(rowlens)); 1239 1240 /* fill in and store column indices */ 1241 PetscCall(PetscMalloc1(nz,&colidxs)); 1242 for (cnt=0, i=0; i<m; i++) { 1243 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1248 colidxs[cnt++] = A->j[ja] + cs; 1249 for (; jb<B->i[i+1]; jb++) 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1259 PetscCall(PetscMalloc1(nz,&matvals)); 1260 for (cnt=0, i=0; i<m; i++) { 1261 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1266 matvals[cnt++] = aa[ja]; 1267 for (; jb<B->i[i+1]; jb++) 1268 matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1272 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1285 PetscMPIInt rank = aij->rank,size = aij->size; 1286 PetscBool isdraw,iascii,isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer,&format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size,&nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1300 for (i=0; i<(PetscInt)size; i++) { 1301 nmax = PetscMax(nmax,nz[i]); 1302 nmin = PetscMin(nmin,nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg/size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1308 PetscFunctionReturn(0); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer,&format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes=NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1316 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1321 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1324 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1334 PetscFunctionReturn(0); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount,inodelimit,*inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(0); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(0); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A,viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1353 } 1354 PetscFunctionReturn(0); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1363 PetscCall(PetscDrawIsNull(draw,&isnull)); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow,iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1373 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) { 1399 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1400 } 1401 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1402 } 1403 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 PetscCall(PetscViewerFlush(viewer)); 1405 PetscCall(MatDestroy(&A)); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1411 { 1412 PetscBool iascii,isdraw,issocket,isbinary; 1413 1414 PetscFunctionBegin; 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1418 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1419 if (iascii || isdraw || isbinary || issocket) { 1420 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1426 { 1427 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1428 Vec bb1 = NULL; 1429 PetscBool hasop; 1430 1431 PetscFunctionBegin; 1432 if (flag == SOR_APPLY_UPPER) { 1433 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1438 PetscCall(VecDuplicate(bb,&bb1)); 1439 } 1440 1441 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1442 if (flag & SOR_ZERO_INITIAL_GUESS) { 1443 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1444 its--; 1445 } 1446 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec,-1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec,-1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1473 } 1474 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1475 if (flag & SOR_ZERO_INITIAL_GUESS) { 1476 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1477 its--; 1478 } 1479 while (its--) { 1480 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1482 1483 /* update rhs: bb1 = bb - B*x */ 1484 PetscCall(VecScale(mat->lvec,-1.0)); 1485 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1489 } 1490 } else if (flag & SOR_EISENSTAT) { 1491 Vec xx1; 1492 1493 PetscCall(VecDuplicate(bb,&xx1)); 1494 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1495 1496 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1498 if (!mat->diag) { 1499 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1500 PetscCall(MatGetDiagonal(matin,mat->diag)); 1501 } 1502 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1503 if (hasop) { 1504 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1505 } else { 1506 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1507 } 1508 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1509 1510 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1511 1512 /* local sweep */ 1513 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1514 PetscCall(VecAXPY(xx,1.0,xx1)); 1515 PetscCall(VecDestroy(&xx1)); 1516 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1517 1518 PetscCall(VecDestroy(&bb1)); 1519 1520 matin->factorerrortype = mat->A->factorerrortype; 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1525 { 1526 Mat aA,aB,Aperm; 1527 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1528 PetscScalar *aa,*ba; 1529 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1530 PetscSF rowsf,sf; 1531 IS parcolp = NULL; 1532 PetscBool done; 1533 1534 PetscFunctionBegin; 1535 PetscCall(MatGetLocalSize(A,&m,&n)); 1536 PetscCall(ISGetIndices(rowp,&rwant)); 1537 PetscCall(ISGetIndices(colp,&cwant)); 1538 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1539 1540 /* Invert row permutation to find out where my rows should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1542 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1543 PetscCall(PetscSFSetFromOptions(rowsf)); 1544 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1547 1548 /* Invert column permutation to find out where my columns should go */ 1549 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1550 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1551 PetscCall(PetscSFSetFromOptions(sf)); 1552 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1553 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1555 PetscCall(PetscSFDestroy(&sf)); 1556 1557 PetscCall(ISRestoreIndices(rowp,&rwant)); 1558 PetscCall(ISRestoreIndices(colp,&cwant)); 1559 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1560 1561 /* Find out where my gcols should go */ 1562 PetscCall(MatGetSize(aB,NULL,&ng)); 1563 PetscCall(PetscMalloc1(ng,&gcdest)); 1564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1565 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1566 PetscCall(PetscSFSetFromOptions(sf)); 1567 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&sf)); 1570 1571 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1572 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1573 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1574 for (i=0; i<m; i++) { 1575 PetscInt row = rdest[i]; 1576 PetscMPIInt rowner; 1577 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1578 for (j=ai[i]; j<ai[i+1]; j++) { 1579 PetscInt col = cdest[aj[j]]; 1580 PetscMPIInt cowner; 1581 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1582 if (rowner == cowner) dnnz[i]++; 1583 else onnz[i]++; 1584 } 1585 for (j=bi[i]; j<bi[i+1]; j++) { 1586 PetscInt col = gcdest[bj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 } 1593 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1597 PetscCall(PetscSFDestroy(&rowsf)); 1598 1599 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1600 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1601 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1602 for (i=0; i<m; i++) { 1603 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1604 PetscInt j0,rowlen; 1605 rowlen = ai[i+1] - ai[i]; 1606 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1607 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1608 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1609 } 1610 rowlen = bi[i+1] - bi[i]; 1611 for (j0=j=0; j<rowlen; j0=j) { 1612 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1613 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1614 } 1615 } 1616 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1619 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1620 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1621 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1622 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1623 PetscCall(PetscFree3(work,rdest,cdest)); 1624 PetscCall(PetscFree(gcdest)); 1625 if (parcolp) PetscCall(ISDestroy(&colp)); 1626 *B = Aperm; 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1631 { 1632 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1633 1634 PetscFunctionBegin; 1635 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1636 if (ghosts) *ghosts = aij->garray; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1641 { 1642 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1643 Mat A = mat->A,B = mat->B; 1644 PetscLogDouble isend[5],irecv[5]; 1645 1646 PetscFunctionBegin; 1647 info->block_size = 1.0; 1648 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1649 1650 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1651 isend[3] = info->memory; isend[4] = info->mallocs; 1652 1653 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1654 1655 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(0); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A,1); 1701 PetscCall(MatSetOption(a->A,op,flg)); 1702 PetscCall(MatSetOption(a->B,op,flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A,1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 break; 1725 case MAT_SUBMAT_SINGLEIS: 1726 A->submat_singleis = flg; 1727 break; 1728 case MAT_STRUCTURE_ONLY: 1729 /* The option is handled directly by MatSetOption() */ 1730 break; 1731 default: 1732 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1733 } 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1738 { 1739 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1740 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1741 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1742 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1743 PetscInt *cmap,*idx_p; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1747 mat->getrowactive = PETSC_TRUE; 1748 1749 if (!mat->rowvalues && (idx || v)) { 1750 /* 1751 allocate enough space to hold information from the longest row. 1752 */ 1753 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1754 PetscInt max = 1,tmp; 1755 for (i=0; i<matin->rmap->n; i++) { 1756 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1757 if (max < tmp) max = tmp; 1758 } 1759 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1760 } 1761 1762 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1763 lrow = row - rstart; 1764 1765 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1766 if (!v) {pvA = NULL; pvB = NULL;} 1767 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1768 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1769 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1770 nztot = nzA + nzB; 1771 1772 cmap = mat->garray; 1773 if (v || idx) { 1774 if (nztot) { 1775 /* Sort by increasing column numbers, assuming A and B already sorted */ 1776 PetscInt imark = -1; 1777 if (v) { 1778 *v = v_p = mat->rowvalues; 1779 for (i=0; i<nzB; i++) { 1780 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1781 else break; 1782 } 1783 imark = i; 1784 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1785 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1786 } 1787 if (idx) { 1788 *idx = idx_p = mat->rowindices; 1789 if (imark > -1) { 1790 for (i=0; i<imark; i++) { 1791 idx_p[i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 for (i=0; i<nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1801 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1828 PetscInt i,j,cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v,*amata,*bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A,type,norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i=0; i<amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1842 } 1843 v = bmata; 1844 for (i=0; i<bmat->nz; i++) { 1845 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1846 } 1847 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1848 *norm = PetscSqrtReal(*norm); 1849 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1850 } else if (type == NORM_1) { /* max column norm */ 1851 PetscReal *tmp,*tmp2; 1852 PetscInt *jj,*garray = aij->garray; 1853 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1854 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1855 *norm = 0.0; 1856 v = amata; jj = amat->j; 1857 for (j=0; j<amat->nz; j++) { 1858 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1859 } 1860 v = bmata; jj = bmat->j; 1861 for (j=0; j<bmat->nz; j++) { 1862 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1863 } 1864 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1865 for (j=0; j<mat->cmap->N; j++) { 1866 if (tmp2[j] > *norm) *norm = tmp2[j]; 1867 } 1868 PetscCall(PetscFree(tmp)); 1869 PetscCall(PetscFree(tmp2)); 1870 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1871 } else if (type == NORM_INFINITY) { /* max row norm */ 1872 PetscReal ntemp = 0.0; 1873 for (j=0; j<aij->A->rmap->n; j++) { 1874 v = amata + amat->i[j]; 1875 sum = 0.0; 1876 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1877 sum += PetscAbsScalar(*v); v++; 1878 } 1879 v = bmata + bmat->i[j]; 1880 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); v++; 1882 } 1883 if (sum > ntemp) ntemp = sum; 1884 } 1885 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1887 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1895 { 1896 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1897 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1898 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1899 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1900 Mat B,A_diag,*B_diag; 1901 const MatScalar *pbv,*bv; 1902 1903 PetscFunctionBegin; 1904 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1905 ai = Aloc->i; aj = Aloc->j; 1906 bi = Bloc->i; bj = Bloc->j; 1907 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1908 PetscInt *d_nnz,*g_nnz,*o_nnz; 1909 PetscSFNode *oloc; 1910 PETSC_UNUSED PetscSF sf; 1911 1912 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1913 /* compute d_nnz for preallocation */ 1914 PetscCall(PetscArrayzero(d_nnz,na)); 1915 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1916 /* compute local off-diagonal contributions */ 1917 PetscCall(PetscArrayzero(g_nnz,nb)); 1918 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1919 /* map those to global */ 1920 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1921 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1922 PetscCall(PetscSFSetFromOptions(sf)); 1923 PetscCall(PetscArrayzero(o_nnz,na)); 1924 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1926 PetscCall(PetscSFDestroy(&sf)); 1927 1928 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1929 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1930 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1931 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1932 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1933 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1934 } else { 1935 B = *matout; 1936 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1937 } 1938 1939 b = (Mat_MPIAIJ*)B->data; 1940 A_diag = a->A; 1941 B_diag = &b->A; 1942 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1943 A_diag_ncol = A_diag->cmap->N; 1944 B_diag_ilen = sub_B_diag->ilen; 1945 B_diag_i = sub_B_diag->i; 1946 1947 /* Set ilen for diagonal of B */ 1948 for (i=0; i<A_diag_ncol; i++) { 1949 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1950 } 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb],&cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i=0; i<mb; i++) { 1964 ncol = bi[i+1]-bi[i]; 1965 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1966 row++; 1967 pbv += ncol; cols_tmp += ncol; 1968 } 1969 PetscCall(PetscFree(cols)); 1970 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1971 1972 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1973 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1974 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1975 *matout = B; 1976 } else { 1977 PetscCall(MatHeaderMerge(A,&B)); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1983 { 1984 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1985 Mat a = aij->A,b = aij->B; 1986 PetscInt s1,s2,s3; 1987 1988 PetscFunctionBegin; 1989 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1990 if (rr) { 1991 PetscCall(VecGetLocalSize(rr,&s1)); 1992 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1993 /* Overlap communication with computation. */ 1994 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1995 } 1996 if (ll) { 1997 PetscCall(VecGetLocalSize(ll,&s1)); 1998 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1999 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2000 } 2001 /* scale the diagonal block */ 2002 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2003 2004 if (rr) { 2005 /* Do a scatter end and then right scale the off-diagonal block */ 2006 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2007 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2008 } 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2013 { 2014 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2015 2016 PetscFunctionBegin; 2017 PetscCall(MatSetUnfactored(a->A)); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2022 { 2023 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2024 Mat a,b,c,d; 2025 PetscBool flg; 2026 2027 PetscFunctionBegin; 2028 a = matA->A; b = matA->B; 2029 c = matB->A; d = matB->B; 2030 2031 PetscCall(MatEqual(a,c,&flg)); 2032 if (flg) { 2033 PetscCall(MatEqual(b,d,&flg)); 2034 } 2035 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2036 PetscFunctionReturn(0); 2037 } 2038 2039 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2043 2044 PetscFunctionBegin; 2045 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2046 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2047 /* because of the column compression in the off-processor part of the matrix a->B, 2048 the number of columns in a->B and b->B may be different, hence we cannot call 2049 the MatCopy() directly on the two parts. If need be, we can provide a more 2050 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2051 then copying the submatrices */ 2052 PetscCall(MatCopy_Basic(A,B,str)); 2053 } else { 2054 PetscCall(MatCopy(a->A,b->A,str)); 2055 PetscCall(MatCopy(a->B,b->B,str)); 2056 } 2057 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2062 { 2063 PetscFunctionBegin; 2064 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2073 { 2074 PetscInt i,j,k,nzx,nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i=0; i<m; i++) { 2079 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2080 nzx = xi[i+1] - xi[i]; 2081 nzy = yi[i+1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2084 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k<nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2112 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d,*nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2124 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2129 PetscCall(MatHeaderMerge(Y,&B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(0); 2158 } 2159 2160 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2173 PetscInt i,*idxb = NULL,m = A->rmap->n; 2174 PetscScalar *va,*vv; 2175 Vec vB,vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA,&va)); 2183 if (idx) { 2184 for (i=0; i<m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2190 PetscCall(PetscMalloc1(m,&idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v,&vv)); 2194 PetscCall(VecGetArrayRead(vB,&vb)); 2195 for (i=0; i<m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2202 idx[i] = a->garray[idxb[i]]; 2203 } 2204 } 2205 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2206 PetscCall(VecRestoreArrayWrite(vA,&va)); 2207 PetscCall(VecRestoreArrayRead(vB,&vb)); 2208 PetscCall(PetscFree(idxb)); 2209 PetscCall(VecDestroy(&vA)); 2210 PetscCall(VecDestroy(&vB)); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2215 { 2216 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2217 PetscInt m = A->rmap->n,n = A->cmap->n; 2218 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2219 PetscInt *cmap = mat->garray; 2220 PetscInt *diagIdx, *offdiagIdx; 2221 Vec diagV, offdiagV; 2222 PetscScalar *a, *diagA, *offdiagA; 2223 const PetscScalar *ba,*bav; 2224 PetscInt r,j,col,ncols,*bi,*bj; 2225 Mat B = mat->B; 2226 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2227 2228 PetscFunctionBegin; 2229 /* When a process holds entire A and other processes have no entry */ 2230 if (A->cmap->N == n) { 2231 PetscCall(VecGetArrayWrite(v,&diagA)); 2232 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2233 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2234 PetscCall(VecDestroy(&diagV)); 2235 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2236 PetscFunctionReturn(0); 2237 } else if (n == 0) { 2238 if (m) { 2239 PetscCall(VecGetArrayWrite(v,&a)); 2240 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2241 PetscCall(VecRestoreArrayWrite(v,&a)); 2242 } 2243 PetscFunctionReturn(0); 2244 } 2245 2246 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r+1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j=0; j<ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols-1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j=0; j<ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2290 ba++; bj++; 2291 } 2292 } 2293 2294 PetscCall(VecGetArrayWrite(v, &a)); 2295 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2296 for (r = 0; r < m; ++r) { 2297 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2298 a[r] = diagA[r]; 2299 if (idx) idx[r] = cstart + diagIdx[r]; 2300 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) { 2303 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2304 idx[r] = cstart + diagIdx[r]; 2305 } else idx[r] = offdiagIdx[r]; 2306 } 2307 } else { 2308 a[r] = offdiagA[r]; 2309 if (idx) idx[r] = offdiagIdx[r]; 2310 } 2311 } 2312 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2313 PetscCall(VecRestoreArrayWrite(v, &a)); 2314 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2315 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2316 PetscCall(VecDestroy(&diagV)); 2317 PetscCall(VecDestroy(&offdiagV)); 2318 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2325 PetscInt m = A->rmap->n,n = A->cmap->n; 2326 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2327 PetscInt *cmap = mat->garray; 2328 PetscInt *diagIdx, *offdiagIdx; 2329 Vec diagV, offdiagV; 2330 PetscScalar *a, *diagA, *offdiagA; 2331 const PetscScalar *ba,*bav; 2332 PetscInt r,j,col,ncols,*bi,*bj; 2333 Mat B = mat->B; 2334 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2335 2336 PetscFunctionBegin; 2337 /* When a process holds entire A and other processes have no entry */ 2338 if (A->cmap->N == n) { 2339 PetscCall(VecGetArrayWrite(v,&diagA)); 2340 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2341 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2342 PetscCall(VecDestroy(&diagV)); 2343 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2344 PetscFunctionReturn(0); 2345 } else if (n == 0) { 2346 if (m) { 2347 PetscCall(VecGetArrayWrite(v,&a)); 2348 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2349 PetscCall(VecRestoreArrayWrite(v,&a)); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 2354 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2356 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2357 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2358 2359 /* Get offdiagIdx[] for implicit 0.0 */ 2360 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2361 ba = bav; 2362 bi = b->i; 2363 bj = b->j; 2364 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2365 for (r = 0; r < m; r++) { 2366 ncols = bi[r+1] - bi[r]; 2367 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2368 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2369 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2370 offdiagA[r] = 0.0; 2371 2372 /* Find first hole in the cmap */ 2373 for (j=0; j<ncols; j++) { 2374 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2375 if (col > j && j < cstart) { 2376 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2377 break; 2378 } else if (col > j + n && j >= cstart) { 2379 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2380 break; 2381 } 2382 } 2383 if (j == ncols && ncols < A->cmap->N - n) { 2384 /* a hole is outside compressed Bcols */ 2385 if (ncols == 0) { 2386 if (cstart) { 2387 offdiagIdx[r] = 0; 2388 } else offdiagIdx[r] = cend; 2389 } else { /* ncols > 0 */ 2390 offdiagIdx[r] = cmap[ncols-1] + 1; 2391 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2392 } 2393 } 2394 } 2395 2396 for (j=0; j<ncols; j++) { 2397 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2398 ba++; bj++; 2399 } 2400 } 2401 2402 PetscCall(VecGetArrayWrite(v, &a)); 2403 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2404 for (r = 0; r < m; ++r) { 2405 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2406 a[r] = diagA[r]; 2407 if (idx) idx[r] = cstart + diagIdx[r]; 2408 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2409 a[r] = diagA[r]; 2410 if (idx) { 2411 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2412 idx[r] = cstart + diagIdx[r]; 2413 } else idx[r] = offdiagIdx[r]; 2414 } 2415 } else { 2416 a[r] = offdiagA[r]; 2417 if (idx) idx[r] = offdiagIdx[r]; 2418 } 2419 } 2420 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2421 PetscCall(VecRestoreArrayWrite(v, &a)); 2422 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2423 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2424 PetscCall(VecDestroy(&diagV)); 2425 PetscCall(VecDestroy(&offdiagV)); 2426 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2433 PetscInt m = A->rmap->n,n = A->cmap->n; 2434 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 const PetscScalar *ba,*bav; 2440 PetscInt r,j,col,ncols,*bi,*bj; 2441 Mat B = mat->B; 2442 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2443 2444 PetscFunctionBegin; 2445 /* When a process holds entire A and other processes have no entry */ 2446 if (A->cmap->N == n) { 2447 PetscCall(VecGetArrayWrite(v,&diagA)); 2448 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2449 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2452 PetscFunctionReturn(0); 2453 } else if (n == 0) { 2454 if (m) { 2455 PetscCall(VecGetArrayWrite(v,&a)); 2456 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2457 PetscCall(VecRestoreArrayWrite(v,&a)); 2458 } 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2464 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2465 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2466 2467 /* Get offdiagIdx[] for implicit 0.0 */ 2468 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2469 ba = bav; 2470 bi = b->i; 2471 bj = b->j; 2472 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2473 for (r = 0; r < m; r++) { 2474 ncols = bi[r+1] - bi[r]; 2475 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2476 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2477 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2478 offdiagA[r] = 0.0; 2479 2480 /* Find first hole in the cmap */ 2481 for (j=0; j<ncols; j++) { 2482 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2483 if (col > j && j < cstart) { 2484 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2485 break; 2486 } else if (col > j + n && j >= cstart) { 2487 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2488 break; 2489 } 2490 } 2491 if (j == ncols && ncols < A->cmap->N - n) { 2492 /* a hole is outside compressed Bcols */ 2493 if (ncols == 0) { 2494 if (cstart) { 2495 offdiagIdx[r] = 0; 2496 } else offdiagIdx[r] = cend; 2497 } else { /* ncols > 0 */ 2498 offdiagIdx[r] = cmap[ncols-1] + 1; 2499 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2500 } 2501 } 2502 } 2503 2504 for (j=0; j<ncols; j++) { 2505 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2506 ba++; bj++; 2507 } 2508 } 2509 2510 PetscCall(VecGetArrayWrite(v, &a)); 2511 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2512 for (r = 0; r < m; ++r) { 2513 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2514 a[r] = diagA[r]; 2515 if (idx) idx[r] = cstart + diagIdx[r]; 2516 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2517 a[r] = diagA[r]; 2518 if (idx) { 2519 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2520 idx[r] = cstart + diagIdx[r]; 2521 } else idx[r] = offdiagIdx[r]; 2522 } 2523 } else { 2524 a[r] = offdiagA[r]; 2525 if (idx) idx[r] = offdiagIdx[r]; 2526 } 2527 } 2528 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2529 PetscCall(VecRestoreArrayWrite(v, &a)); 2530 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2531 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2532 PetscCall(VecDestroy(&diagV)); 2533 PetscCall(VecDestroy(&offdiagV)); 2534 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2535 PetscFunctionReturn(0); 2536 } 2537 2538 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2539 { 2540 Mat *dummy; 2541 2542 PetscFunctionBegin; 2543 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2544 *newmat = *dummy; 2545 PetscCall(PetscFree(dummy)); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2550 { 2551 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2552 2553 PetscFunctionBegin; 2554 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2555 A->factorerrortype = a->A->factorerrortype; 2556 PetscFunctionReturn(0); 2557 } 2558 2559 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2560 { 2561 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2562 2563 PetscFunctionBegin; 2564 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2565 PetscCall(MatSetRandom(aij->A,rctx)); 2566 if (x->assembled) { 2567 PetscCall(MatSetRandom(aij->B,rctx)); 2568 } else { 2569 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2570 } 2571 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2572 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2577 { 2578 PetscFunctionBegin; 2579 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2580 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2581 PetscFunctionReturn(0); 2582 } 2583 2584 /*@ 2585 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2586 2587 Collective on Mat 2588 2589 Input Parameters: 2590 + A - the matrix 2591 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2592 2593 Level: advanced 2594 2595 @*/ 2596 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2604 { 2605 PetscBool sc = PETSC_FALSE,flg; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2609 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2610 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2611 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2612 PetscOptionsHeadEnd(); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2619 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2620 2621 PetscFunctionBegin; 2622 if (!Y->preallocated) { 2623 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2624 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2625 PetscInt nonew = aij->nonew; 2626 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2627 aij->nonew = nonew; 2628 } 2629 PetscCall(MatShift_Basic(Y,a)); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2634 { 2635 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2636 2637 PetscFunctionBegin; 2638 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2639 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2640 if (d) { 2641 PetscInt rstart; 2642 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2643 *d += rstart; 2644 2645 } 2646 PetscFunctionReturn(0); 2647 } 2648 2649 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2650 { 2651 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2652 2653 PetscFunctionBegin; 2654 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2655 PetscFunctionReturn(0); 2656 } 2657 2658 /* -------------------------------------------------------------------*/ 2659 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2660 MatGetRow_MPIAIJ, 2661 MatRestoreRow_MPIAIJ, 2662 MatMult_MPIAIJ, 2663 /* 4*/ MatMultAdd_MPIAIJ, 2664 MatMultTranspose_MPIAIJ, 2665 MatMultTransposeAdd_MPIAIJ, 2666 NULL, 2667 NULL, 2668 NULL, 2669 /*10*/ NULL, 2670 NULL, 2671 NULL, 2672 MatSOR_MPIAIJ, 2673 MatTranspose_MPIAIJ, 2674 /*15*/ MatGetInfo_MPIAIJ, 2675 MatEqual_MPIAIJ, 2676 MatGetDiagonal_MPIAIJ, 2677 MatDiagonalScale_MPIAIJ, 2678 MatNorm_MPIAIJ, 2679 /*20*/ MatAssemblyBegin_MPIAIJ, 2680 MatAssemblyEnd_MPIAIJ, 2681 MatSetOption_MPIAIJ, 2682 MatZeroEntries_MPIAIJ, 2683 /*24*/ MatZeroRows_MPIAIJ, 2684 NULL, 2685 NULL, 2686 NULL, 2687 NULL, 2688 /*29*/ MatSetUp_MPIAIJ, 2689 NULL, 2690 NULL, 2691 MatGetDiagonalBlock_MPIAIJ, 2692 NULL, 2693 /*34*/ MatDuplicate_MPIAIJ, 2694 NULL, 2695 NULL, 2696 NULL, 2697 NULL, 2698 /*39*/ MatAXPY_MPIAIJ, 2699 MatCreateSubMatrices_MPIAIJ, 2700 MatIncreaseOverlap_MPIAIJ, 2701 MatGetValues_MPIAIJ, 2702 MatCopy_MPIAIJ, 2703 /*44*/ MatGetRowMax_MPIAIJ, 2704 MatScale_MPIAIJ, 2705 MatShift_MPIAIJ, 2706 MatDiagonalSet_MPIAIJ, 2707 MatZeroRowsColumns_MPIAIJ, 2708 /*49*/ MatSetRandom_MPIAIJ, 2709 MatGetRowIJ_MPIAIJ, 2710 MatRestoreRowIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 /*54*/ MatFDColoringCreate_MPIXAIJ, 2714 NULL, 2715 MatSetUnfactored_MPIAIJ, 2716 MatPermute_MPIAIJ, 2717 NULL, 2718 /*59*/ MatCreateSubMatrix_MPIAIJ, 2719 MatDestroy_MPIAIJ, 2720 MatView_MPIAIJ, 2721 NULL, 2722 NULL, 2723 /*64*/ NULL, 2724 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2729 MatGetRowMinAbs_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*75*/ MatFDColoringApply_AIJ, 2735 MatSetFromOptions_MPIAIJ, 2736 NULL, 2737 NULL, 2738 MatFindZeroDiagonals_MPIAIJ, 2739 /*80*/ NULL, 2740 NULL, 2741 NULL, 2742 /*83*/ MatLoad_MPIAIJ, 2743 MatIsSymmetric_MPIAIJ, 2744 NULL, 2745 NULL, 2746 NULL, 2747 NULL, 2748 /*89*/ NULL, 2749 NULL, 2750 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2751 NULL, 2752 NULL, 2753 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2754 NULL, 2755 NULL, 2756 NULL, 2757 MatBindToCPU_MPIAIJ, 2758 /*99*/ MatProductSetFromOptions_MPIAIJ, 2759 NULL, 2760 NULL, 2761 MatConjugate_MPIAIJ, 2762 NULL, 2763 /*104*/MatSetValuesRow_MPIAIJ, 2764 MatRealPart_MPIAIJ, 2765 MatImaginaryPart_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*109*/NULL, 2769 NULL, 2770 MatGetRowMin_MPIAIJ, 2771 NULL, 2772 MatMissingDiagonal_MPIAIJ, 2773 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2774 NULL, 2775 MatGetGhosts_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*119*/MatMultDiagonalBlock_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 MatGetMultiProcBlock_MPIAIJ, 2783 /*124*/MatFindNonzeroRows_MPIAIJ, 2784 MatGetColumnReductions_MPIAIJ, 2785 MatInvertBlockDiagonal_MPIAIJ, 2786 MatInvertVariableBlockDiagonal_MPIAIJ, 2787 MatCreateSubMatricesMPI_MPIAIJ, 2788 /*129*/NULL, 2789 NULL, 2790 NULL, 2791 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2792 NULL, 2793 /*134*/NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*139*/MatSetBlockSizes_MPIAIJ, 2799 NULL, 2800 NULL, 2801 MatFDColoringSetUp_MPIXAIJ, 2802 MatFindOffBlockDiagonalEntries_MPIAIJ, 2803 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2804 /*145*/NULL, 2805 NULL, 2806 NULL, 2807 MatCreateGraph_Simple_AIJ, 2808 MatFilter_AIJ 2809 }; 2810 2811 /* ----------------------------------------------------------------------------------------*/ 2812 2813 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2814 { 2815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2816 2817 PetscFunctionBegin; 2818 PetscCall(MatStoreValues(aij->A)); 2819 PetscCall(MatStoreValues(aij->B)); 2820 PetscFunctionReturn(0); 2821 } 2822 2823 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2824 { 2825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2826 2827 PetscFunctionBegin; 2828 PetscCall(MatRetrieveValues(aij->A)); 2829 PetscCall(MatRetrieveValues(aij->B)); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2834 { 2835 Mat_MPIAIJ *b; 2836 PetscMPIInt size; 2837 2838 PetscFunctionBegin; 2839 PetscCall(PetscLayoutSetUp(B->rmap)); 2840 PetscCall(PetscLayoutSetUp(B->cmap)); 2841 b = (Mat_MPIAIJ*)B->data; 2842 2843 #if defined(PETSC_USE_CTABLE) 2844 PetscCall(PetscTableDestroy(&b->colmap)); 2845 #else 2846 PetscCall(PetscFree(b->colmap)); 2847 #endif 2848 PetscCall(PetscFree(b->garray)); 2849 PetscCall(VecDestroy(&b->lvec)); 2850 PetscCall(VecScatterDestroy(&b->Mvctx)); 2851 2852 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2853 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2854 PetscCall(MatDestroy(&b->B)); 2855 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2856 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2857 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2858 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2859 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2860 2861 if (!B->preallocated) { 2862 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2863 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2864 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2865 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2866 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2867 } 2868 2869 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2870 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2871 B->preallocated = PETSC_TRUE; 2872 B->was_assembled = PETSC_FALSE; 2873 B->assembled = PETSC_FALSE; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2878 { 2879 Mat_MPIAIJ *b; 2880 2881 PetscFunctionBegin; 2882 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2883 PetscCall(PetscLayoutSetUp(B->rmap)); 2884 PetscCall(PetscLayoutSetUp(B->cmap)); 2885 b = (Mat_MPIAIJ*)B->data; 2886 2887 #if defined(PETSC_USE_CTABLE) 2888 PetscCall(PetscTableDestroy(&b->colmap)); 2889 #else 2890 PetscCall(PetscFree(b->colmap)); 2891 #endif 2892 PetscCall(PetscFree(b->garray)); 2893 PetscCall(VecDestroy(&b->lvec)); 2894 PetscCall(VecScatterDestroy(&b->Mvctx)); 2895 2896 PetscCall(MatResetPreallocation(b->A)); 2897 PetscCall(MatResetPreallocation(b->B)); 2898 B->preallocated = PETSC_TRUE; 2899 B->was_assembled = PETSC_FALSE; 2900 B->assembled = PETSC_FALSE; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2905 { 2906 Mat mat; 2907 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2908 2909 PetscFunctionBegin; 2910 *newmat = NULL; 2911 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2912 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2913 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2914 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2915 a = (Mat_MPIAIJ*)mat->data; 2916 2917 mat->factortype = matin->factortype; 2918 mat->assembled = matin->assembled; 2919 mat->insertmode = NOT_SET_VALUES; 2920 mat->preallocated = matin->preallocated; 2921 2922 a->size = oldmat->size; 2923 a->rank = oldmat->rank; 2924 a->donotstash = oldmat->donotstash; 2925 a->roworiented = oldmat->roworiented; 2926 a->rowindices = NULL; 2927 a->rowvalues = NULL; 2928 a->getrowactive = PETSC_FALSE; 2929 2930 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2931 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2932 2933 if (oldmat->colmap) { 2934 #if defined(PETSC_USE_CTABLE) 2935 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2936 #else 2937 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2938 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2939 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2940 #endif 2941 } else a->colmap = NULL; 2942 if (oldmat->garray) { 2943 PetscInt len; 2944 len = oldmat->B->cmap->n; 2945 PetscCall(PetscMalloc1(len+1,&a->garray)); 2946 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2947 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2948 } else a->garray = NULL; 2949 2950 /* It may happen MatDuplicate is called with a non-assembled matrix 2951 In fact, MatDuplicate only requires the matrix to be preallocated 2952 This may happen inside a DMCreateMatrix_Shell */ 2953 if (oldmat->lvec) { 2954 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2955 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2956 } 2957 if (oldmat->Mvctx) { 2958 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2959 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2960 } 2961 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2962 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2963 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2964 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2965 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2966 *newmat = mat; 2967 PetscFunctionReturn(0); 2968 } 2969 2970 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2971 { 2972 PetscBool isbinary, ishdf5; 2973 2974 PetscFunctionBegin; 2975 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2976 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2977 /* force binary viewer to load .info file if it has not yet done so */ 2978 PetscCall(PetscViewerSetUp(viewer)); 2979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2980 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2981 if (isbinary) { 2982 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2983 } else if (ishdf5) { 2984 #if defined(PETSC_HAVE_HDF5) 2985 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2986 #else 2987 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2988 #endif 2989 } else { 2990 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2991 } 2992 PetscFunctionReturn(0); 2993 } 2994 2995 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2996 { 2997 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2998 PetscInt *rowidxs,*colidxs; 2999 PetscScalar *matvals; 3000 3001 PetscFunctionBegin; 3002 PetscCall(PetscViewerSetUp(viewer)); 3003 3004 /* read in matrix header */ 3005 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3006 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3007 M = header[1]; N = header[2]; nz = header[3]; 3008 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3009 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3010 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3011 3012 /* set block sizes from the viewer's .info file */ 3013 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3014 /* set global sizes if not set already */ 3015 if (mat->rmap->N < 0) mat->rmap->N = M; 3016 if (mat->cmap->N < 0) mat->cmap->N = N; 3017 PetscCall(PetscLayoutSetUp(mat->rmap)); 3018 PetscCall(PetscLayoutSetUp(mat->cmap)); 3019 3020 /* check if the matrix sizes are correct */ 3021 PetscCall(MatGetSize(mat,&rows,&cols)); 3022 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3023 3024 /* read in row lengths and build row indices */ 3025 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3026 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3027 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3028 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3029 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3030 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3031 /* read in column indices and matrix values */ 3032 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3033 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3034 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3035 /* store matrix indices and values */ 3036 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3037 PetscCall(PetscFree(rowidxs)); 3038 PetscCall(PetscFree2(colidxs,matvals)); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 /* Not scalable because of ISAllGather() unless getting all columns. */ 3043 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3044 { 3045 IS iscol_local; 3046 PetscBool isstride; 3047 PetscMPIInt lisstride=0,gisstride; 3048 3049 PetscFunctionBegin; 3050 /* check if we are grabbing all columns*/ 3051 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3052 3053 if (isstride) { 3054 PetscInt start,len,mstart,mlen; 3055 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3056 PetscCall(ISGetLocalSize(iscol,&len)); 3057 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3058 if (mstart == start && mlen-mstart == len) lisstride = 1; 3059 } 3060 3061 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3062 if (gisstride) { 3063 PetscInt N; 3064 PetscCall(MatGetSize(mat,NULL,&N)); 3065 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3066 PetscCall(ISSetIdentity(iscol_local)); 3067 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3068 } else { 3069 PetscInt cbs; 3070 PetscCall(ISGetBlockSize(iscol,&cbs)); 3071 PetscCall(ISAllGather(iscol,&iscol_local)); 3072 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3073 } 3074 3075 *isseq = iscol_local; 3076 PetscFunctionReturn(0); 3077 } 3078 3079 /* 3080 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3081 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3082 3083 Input Parameters: 3084 mat - matrix 3085 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3086 i.e., mat->rstart <= isrow[i] < mat->rend 3087 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3088 i.e., mat->cstart <= iscol[i] < mat->cend 3089 Output Parameter: 3090 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3091 iscol_o - sequential column index set for retrieving mat->B 3092 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3093 */ 3094 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3095 { 3096 Vec x,cmap; 3097 const PetscInt *is_idx; 3098 PetscScalar *xarray,*cmaparray; 3099 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3100 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3101 Mat B=a->B; 3102 Vec lvec=a->lvec,lcmap; 3103 PetscInt i,cstart,cend,Bn=B->cmap->N; 3104 MPI_Comm comm; 3105 VecScatter Mvctx=a->Mvctx; 3106 3107 PetscFunctionBegin; 3108 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3109 PetscCall(ISGetLocalSize(iscol,&ncols)); 3110 3111 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3112 PetscCall(MatCreateVecs(mat,&x,NULL)); 3113 PetscCall(VecSet(x,-1.0)); 3114 PetscCall(VecDuplicate(x,&cmap)); 3115 PetscCall(VecSet(cmap,-1.0)); 3116 3117 /* Get start indices */ 3118 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3119 isstart -= ncols; 3120 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3121 3122 PetscCall(ISGetIndices(iscol,&is_idx)); 3123 PetscCall(VecGetArray(x,&xarray)); 3124 PetscCall(VecGetArray(cmap,&cmaparray)); 3125 PetscCall(PetscMalloc1(ncols,&idx)); 3126 for (i=0; i<ncols; i++) { 3127 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3128 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3129 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3130 } 3131 PetscCall(VecRestoreArray(x,&xarray)); 3132 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3133 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3134 3135 /* Get iscol_d */ 3136 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3137 PetscCall(ISGetBlockSize(iscol,&i)); 3138 PetscCall(ISSetBlockSize(*iscol_d,i)); 3139 3140 /* Get isrow_d */ 3141 PetscCall(ISGetLocalSize(isrow,&m)); 3142 rstart = mat->rmap->rstart; 3143 PetscCall(PetscMalloc1(m,&idx)); 3144 PetscCall(ISGetIndices(isrow,&is_idx)); 3145 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3146 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3147 3148 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3149 PetscCall(ISGetBlockSize(isrow,&i)); 3150 PetscCall(ISSetBlockSize(*isrow_d,i)); 3151 3152 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3153 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3154 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3155 3156 PetscCall(VecDuplicate(lvec,&lcmap)); 3157 3158 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3159 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3160 3161 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3162 /* off-process column indices */ 3163 count = 0; 3164 PetscCall(PetscMalloc1(Bn,&idx)); 3165 PetscCall(PetscMalloc1(Bn,&cmap1)); 3166 3167 PetscCall(VecGetArray(lvec,&xarray)); 3168 PetscCall(VecGetArray(lcmap,&cmaparray)); 3169 for (i=0; i<Bn; i++) { 3170 if (PetscRealPart(xarray[i]) > -1.0) { 3171 idx[count] = i; /* local column index in off-diagonal part B */ 3172 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3173 count++; 3174 } 3175 } 3176 PetscCall(VecRestoreArray(lvec,&xarray)); 3177 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3178 3179 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3180 /* cannot ensure iscol_o has same blocksize as iscol! */ 3181 3182 PetscCall(PetscFree(idx)); 3183 *garray = cmap1; 3184 3185 PetscCall(VecDestroy(&x)); 3186 PetscCall(VecDestroy(&cmap)); 3187 PetscCall(VecDestroy(&lcmap)); 3188 PetscFunctionReturn(0); 3189 } 3190 3191 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3192 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3193 { 3194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3195 Mat M = NULL; 3196 MPI_Comm comm; 3197 IS iscol_d,isrow_d,iscol_o; 3198 Mat Asub = NULL,Bsub = NULL; 3199 PetscInt n; 3200 3201 PetscFunctionBegin; 3202 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3203 3204 if (call == MAT_REUSE_MATRIX) { 3205 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3206 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3207 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3208 3209 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3210 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3211 3212 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3213 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3214 3215 /* Update diagonal and off-diagonal portions of submat */ 3216 asub = (Mat_MPIAIJ*)(*submat)->data; 3217 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3218 PetscCall(ISGetLocalSize(iscol_o,&n)); 3219 if (n) { 3220 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3221 } 3222 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3223 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3224 3225 } else { /* call == MAT_INITIAL_MATRIX) */ 3226 const PetscInt *garray; 3227 PetscInt BsubN; 3228 3229 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3230 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3231 3232 /* Create local submatrices Asub and Bsub */ 3233 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3234 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3235 3236 /* Create submatrix M */ 3237 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3238 3239 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3240 asub = (Mat_MPIAIJ*)M->data; 3241 3242 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3243 n = asub->B->cmap->N; 3244 if (BsubN > n) { 3245 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3246 const PetscInt *idx; 3247 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3248 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3249 3250 PetscCall(PetscMalloc1(n,&idx_new)); 3251 j = 0; 3252 PetscCall(ISGetIndices(iscol_o,&idx)); 3253 for (i=0; i<n; i++) { 3254 if (j >= BsubN) break; 3255 while (subgarray[i] > garray[j]) j++; 3256 3257 if (subgarray[i] == garray[j]) { 3258 idx_new[i] = idx[j++]; 3259 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3260 } 3261 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3262 3263 PetscCall(ISDestroy(&iscol_o)); 3264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3265 3266 } else if (BsubN < n) { 3267 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3268 } 3269 3270 PetscCall(PetscFree(garray)); 3271 *submat = M; 3272 3273 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3274 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3275 PetscCall(ISDestroy(&isrow_d)); 3276 3277 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3278 PetscCall(ISDestroy(&iscol_d)); 3279 3280 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3281 PetscCall(ISDestroy(&iscol_o)); 3282 } 3283 PetscFunctionReturn(0); 3284 } 3285 3286 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3287 { 3288 IS iscol_local=NULL,isrow_d; 3289 PetscInt csize; 3290 PetscInt n,i,j,start,end; 3291 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3292 MPI_Comm comm; 3293 3294 PetscFunctionBegin; 3295 /* If isrow has same processor distribution as mat, 3296 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3297 if (call == MAT_REUSE_MATRIX) { 3298 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3299 if (isrow_d) { 3300 sameRowDist = PETSC_TRUE; 3301 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3302 } else { 3303 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3304 if (iscol_local) { 3305 sameRowDist = PETSC_TRUE; 3306 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3307 } 3308 } 3309 } else { 3310 /* Check if isrow has same processor distribution as mat */ 3311 sameDist[0] = PETSC_FALSE; 3312 PetscCall(ISGetLocalSize(isrow,&n)); 3313 if (!n) { 3314 sameDist[0] = PETSC_TRUE; 3315 } else { 3316 PetscCall(ISGetMinMax(isrow,&i,&j)); 3317 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3318 if (i >= start && j < end) { 3319 sameDist[0] = PETSC_TRUE; 3320 } 3321 } 3322 3323 /* Check if iscol has same processor distribution as mat */ 3324 sameDist[1] = PETSC_FALSE; 3325 PetscCall(ISGetLocalSize(iscol,&n)); 3326 if (!n) { 3327 sameDist[1] = PETSC_TRUE; 3328 } else { 3329 PetscCall(ISGetMinMax(iscol,&i,&j)); 3330 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3331 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3332 } 3333 3334 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3335 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3336 sameRowDist = tsameDist[0]; 3337 } 3338 3339 if (sameRowDist) { 3340 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3341 /* isrow and iscol have same processor distribution as mat */ 3342 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3343 PetscFunctionReturn(0); 3344 } else { /* sameRowDist */ 3345 /* isrow has same processor distribution as mat */ 3346 if (call == MAT_INITIAL_MATRIX) { 3347 PetscBool sorted; 3348 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3349 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3350 PetscCall(ISGetSize(iscol,&i)); 3351 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3352 3353 PetscCall(ISSorted(iscol_local,&sorted)); 3354 if (sorted) { 3355 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3356 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3357 PetscFunctionReturn(0); 3358 } 3359 } else { /* call == MAT_REUSE_MATRIX */ 3360 IS iscol_sub; 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3362 if (iscol_sub) { 3363 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3364 PetscFunctionReturn(0); 3365 } 3366 } 3367 } 3368 } 3369 3370 /* General case: iscol -> iscol_local which has global size of iscol */ 3371 if (call == MAT_REUSE_MATRIX) { 3372 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3373 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3374 } else { 3375 if (!iscol_local) { 3376 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3377 } 3378 } 3379 3380 PetscCall(ISGetLocalSize(iscol,&csize)); 3381 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3382 3383 if (call == MAT_INITIAL_MATRIX) { 3384 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3385 PetscCall(ISDestroy(&iscol_local)); 3386 } 3387 PetscFunctionReturn(0); 3388 } 3389 3390 /*@C 3391 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3392 and "off-diagonal" part of the matrix in CSR format. 3393 3394 Collective 3395 3396 Input Parameters: 3397 + comm - MPI communicator 3398 . A - "diagonal" portion of matrix 3399 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3400 - garray - global index of B columns 3401 3402 Output Parameter: 3403 . mat - the matrix, with input A as its local diagonal matrix 3404 Level: advanced 3405 3406 Notes: 3407 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3408 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3409 3410 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3411 @*/ 3412 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3413 { 3414 Mat_MPIAIJ *maij; 3415 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3416 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3417 const PetscScalar *oa; 3418 Mat Bnew; 3419 PetscInt m,n,N; 3420 3421 PetscFunctionBegin; 3422 PetscCall(MatCreate(comm,mat)); 3423 PetscCall(MatGetSize(A,&m,&n)); 3424 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3425 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3426 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3427 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3428 3429 /* Get global columns of mat */ 3430 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3431 3432 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3433 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3434 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3435 maij = (Mat_MPIAIJ*)(*mat)->data; 3436 3437 (*mat)->preallocated = PETSC_TRUE; 3438 3439 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3440 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3441 3442 /* Set A as diagonal portion of *mat */ 3443 maij->A = A; 3444 3445 nz = oi[m]; 3446 for (i=0; i<nz; i++) { 3447 col = oj[i]; 3448 oj[i] = garray[col]; 3449 } 3450 3451 /* Set Bnew as off-diagonal portion of *mat */ 3452 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3453 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3454 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3455 bnew = (Mat_SeqAIJ*)Bnew->data; 3456 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3457 maij->B = Bnew; 3458 3459 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3460 3461 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3462 b->free_a = PETSC_FALSE; 3463 b->free_ij = PETSC_FALSE; 3464 PetscCall(MatDestroy(&B)); 3465 3466 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3467 bnew->free_a = PETSC_TRUE; 3468 bnew->free_ij = PETSC_TRUE; 3469 3470 /* condense columns of maij->B */ 3471 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3472 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3473 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3474 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3475 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3476 PetscFunctionReturn(0); 3477 } 3478 3479 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3480 3481 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3482 { 3483 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3484 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3485 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3486 Mat M,Msub,B=a->B; 3487 MatScalar *aa; 3488 Mat_SeqAIJ *aij; 3489 PetscInt *garray = a->garray,*colsub,Ncols; 3490 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3491 IS iscol_sub,iscmap; 3492 const PetscInt *is_idx,*cmap; 3493 PetscBool allcolumns=PETSC_FALSE; 3494 MPI_Comm comm; 3495 3496 PetscFunctionBegin; 3497 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3498 if (call == MAT_REUSE_MATRIX) { 3499 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3500 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3501 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3502 3503 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3504 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3505 3506 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3507 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3508 3509 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3510 3511 } else { /* call == MAT_INITIAL_MATRIX) */ 3512 PetscBool flg; 3513 3514 PetscCall(ISGetLocalSize(iscol,&n)); 3515 PetscCall(ISGetSize(iscol,&Ncols)); 3516 3517 /* (1) iscol -> nonscalable iscol_local */ 3518 /* Check for special case: each processor gets entire matrix columns */ 3519 PetscCall(ISIdentity(iscol_local,&flg)); 3520 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3521 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3522 if (allcolumns) { 3523 iscol_sub = iscol_local; 3524 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3525 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3526 3527 } else { 3528 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3529 PetscInt *idx,*cmap1,k; 3530 PetscCall(PetscMalloc1(Ncols,&idx)); 3531 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3532 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3533 count = 0; 3534 k = 0; 3535 for (i=0; i<Ncols; i++) { 3536 j = is_idx[i]; 3537 if (j >= cstart && j < cend) { 3538 /* diagonal part of mat */ 3539 idx[count] = j; 3540 cmap1[count++] = i; /* column index in submat */ 3541 } else if (Bn) { 3542 /* off-diagonal part of mat */ 3543 if (j == garray[k]) { 3544 idx[count] = j; 3545 cmap1[count++] = i; /* column index in submat */ 3546 } else if (j > garray[k]) { 3547 while (j > garray[k] && k < Bn-1) k++; 3548 if (j == garray[k]) { 3549 idx[count] = j; 3550 cmap1[count++] = i; /* column index in submat */ 3551 } 3552 } 3553 } 3554 } 3555 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3556 3557 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3558 PetscCall(ISGetBlockSize(iscol,&cbs)); 3559 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3560 3561 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3562 } 3563 3564 /* (3) Create sequential Msub */ 3565 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3566 } 3567 3568 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3569 aij = (Mat_SeqAIJ*)(Msub)->data; 3570 ii = aij->i; 3571 PetscCall(ISGetIndices(iscmap,&cmap)); 3572 3573 /* 3574 m - number of local rows 3575 Ncols - number of columns (same on all processors) 3576 rstart - first row in new global matrix generated 3577 */ 3578 PetscCall(MatGetSize(Msub,&m,NULL)); 3579 3580 if (call == MAT_INITIAL_MATRIX) { 3581 /* (4) Create parallel newmat */ 3582 PetscMPIInt rank,size; 3583 PetscInt csize; 3584 3585 PetscCallMPI(MPI_Comm_size(comm,&size)); 3586 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3587 3588 /* 3589 Determine the number of non-zeros in the diagonal and off-diagonal 3590 portions of the matrix in order to do correct preallocation 3591 */ 3592 3593 /* first get start and end of "diagonal" columns */ 3594 PetscCall(ISGetLocalSize(iscol,&csize)); 3595 if (csize == PETSC_DECIDE) { 3596 PetscCall(ISGetSize(isrow,&mglobal)); 3597 if (mglobal == Ncols) { /* square matrix */ 3598 nlocal = m; 3599 } else { 3600 nlocal = Ncols/size + ((Ncols % size) > rank); 3601 } 3602 } else { 3603 nlocal = csize; 3604 } 3605 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3606 rstart = rend - nlocal; 3607 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3608 3609 /* next, compute all the lengths */ 3610 jj = aij->j; 3611 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3612 olens = dlens + m; 3613 for (i=0; i<m; i++) { 3614 jend = ii[i+1] - ii[i]; 3615 olen = 0; 3616 dlen = 0; 3617 for (j=0; j<jend; j++) { 3618 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3619 else dlen++; 3620 jj++; 3621 } 3622 olens[i] = olen; 3623 dlens[i] = dlen; 3624 } 3625 3626 PetscCall(ISGetBlockSize(isrow,&bs)); 3627 PetscCall(ISGetBlockSize(iscol,&cbs)); 3628 3629 PetscCall(MatCreate(comm,&M)); 3630 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3631 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3632 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3633 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3634 PetscCall(PetscFree(dlens)); 3635 3636 } else { /* call == MAT_REUSE_MATRIX */ 3637 M = *newmat; 3638 PetscCall(MatGetLocalSize(M,&i,NULL)); 3639 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3640 PetscCall(MatZeroEntries(M)); 3641 /* 3642 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3643 rather than the slower MatSetValues(). 3644 */ 3645 M->was_assembled = PETSC_TRUE; 3646 M->assembled = PETSC_FALSE; 3647 } 3648 3649 /* (5) Set values of Msub to *newmat */ 3650 PetscCall(PetscMalloc1(count,&colsub)); 3651 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3652 3653 jj = aij->j; 3654 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3655 for (i=0; i<m; i++) { 3656 row = rstart + i; 3657 nz = ii[i+1] - ii[i]; 3658 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3659 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3660 jj += nz; aa += nz; 3661 } 3662 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3663 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3664 3665 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3666 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3667 3668 PetscCall(PetscFree(colsub)); 3669 3670 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3671 if (call == MAT_INITIAL_MATRIX) { 3672 *newmat = M; 3673 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3674 PetscCall(MatDestroy(&Msub)); 3675 3676 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3677 PetscCall(ISDestroy(&iscol_sub)); 3678 3679 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3680 PetscCall(ISDestroy(&iscmap)); 3681 3682 if (iscol_local) { 3683 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3684 PetscCall(ISDestroy(&iscol_local)); 3685 } 3686 } 3687 PetscFunctionReturn(0); 3688 } 3689 3690 /* 3691 Not great since it makes two copies of the submatrix, first an SeqAIJ 3692 in local and then by concatenating the local matrices the end result. 3693 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3694 3695 Note: This requires a sequential iscol with all indices. 3696 */ 3697 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3698 { 3699 PetscMPIInt rank,size; 3700 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3701 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3702 Mat M,Mreuse; 3703 MatScalar *aa,*vwork; 3704 MPI_Comm comm; 3705 Mat_SeqAIJ *aij; 3706 PetscBool colflag,allcolumns=PETSC_FALSE; 3707 3708 PetscFunctionBegin; 3709 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3710 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3711 PetscCallMPI(MPI_Comm_size(comm,&size)); 3712 3713 /* Check for special case: each processor gets entire matrix columns */ 3714 PetscCall(ISIdentity(iscol,&colflag)); 3715 PetscCall(ISGetLocalSize(iscol,&n)); 3716 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3717 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3718 3719 if (call == MAT_REUSE_MATRIX) { 3720 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3721 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3722 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3723 } else { 3724 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3725 } 3726 3727 /* 3728 m - number of local rows 3729 n - number of columns (same on all processors) 3730 rstart - first row in new global matrix generated 3731 */ 3732 PetscCall(MatGetSize(Mreuse,&m,&n)); 3733 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3734 if (call == MAT_INITIAL_MATRIX) { 3735 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3736 ii = aij->i; 3737 jj = aij->j; 3738 3739 /* 3740 Determine the number of non-zeros in the diagonal and off-diagonal 3741 portions of the matrix in order to do correct preallocation 3742 */ 3743 3744 /* first get start and end of "diagonal" columns */ 3745 if (csize == PETSC_DECIDE) { 3746 PetscCall(ISGetSize(isrow,&mglobal)); 3747 if (mglobal == n) { /* square matrix */ 3748 nlocal = m; 3749 } else { 3750 nlocal = n/size + ((n % size) > rank); 3751 } 3752 } else { 3753 nlocal = csize; 3754 } 3755 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3756 rstart = rend - nlocal; 3757 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3758 3759 /* next, compute all the lengths */ 3760 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3761 olens = dlens + m; 3762 for (i=0; i<m; i++) { 3763 jend = ii[i+1] - ii[i]; 3764 olen = 0; 3765 dlen = 0; 3766 for (j=0; j<jend; j++) { 3767 if (*jj < rstart || *jj >= rend) olen++; 3768 else dlen++; 3769 jj++; 3770 } 3771 olens[i] = olen; 3772 dlens[i] = dlen; 3773 } 3774 PetscCall(MatCreate(comm,&M)); 3775 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3776 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3777 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3778 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3779 PetscCall(PetscFree(dlens)); 3780 } else { 3781 PetscInt ml,nl; 3782 3783 M = *newmat; 3784 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3785 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3786 PetscCall(MatZeroEntries(M)); 3787 /* 3788 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3789 rather than the slower MatSetValues(). 3790 */ 3791 M->was_assembled = PETSC_TRUE; 3792 M->assembled = PETSC_FALSE; 3793 } 3794 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3795 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3796 ii = aij->i; 3797 jj = aij->j; 3798 3799 /* trigger copy to CPU if needed */ 3800 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3801 for (i=0; i<m; i++) { 3802 row = rstart + i; 3803 nz = ii[i+1] - ii[i]; 3804 cwork = jj; jj += nz; 3805 vwork = aa; aa += nz; 3806 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3807 } 3808 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3809 3810 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3811 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3812 *newmat = M; 3813 3814 /* save submatrix used in processor for next request */ 3815 if (call == MAT_INITIAL_MATRIX) { 3816 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3817 PetscCall(MatDestroy(&Mreuse)); 3818 } 3819 PetscFunctionReturn(0); 3820 } 3821 3822 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3823 { 3824 PetscInt m,cstart, cend,j,nnz,i,d; 3825 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3826 const PetscInt *JJ; 3827 PetscBool nooffprocentries; 3828 3829 PetscFunctionBegin; 3830 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3831 3832 PetscCall(PetscLayoutSetUp(B->rmap)); 3833 PetscCall(PetscLayoutSetUp(B->cmap)); 3834 m = B->rmap->n; 3835 cstart = B->cmap->rstart; 3836 cend = B->cmap->rend; 3837 rstart = B->rmap->rstart; 3838 3839 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3840 3841 if (PetscDefined(USE_DEBUG)) { 3842 for (i=0; i<m; i++) { 3843 nnz = Ii[i+1]- Ii[i]; 3844 JJ = J + Ii[i]; 3845 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3846 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3847 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3848 } 3849 } 3850 3851 for (i=0; i<m; i++) { 3852 nnz = Ii[i+1]- Ii[i]; 3853 JJ = J + Ii[i]; 3854 nnz_max = PetscMax(nnz_max,nnz); 3855 d = 0; 3856 for (j=0; j<nnz; j++) { 3857 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3858 } 3859 d_nnz[i] = d; 3860 o_nnz[i] = nnz - d; 3861 } 3862 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3863 PetscCall(PetscFree2(d_nnz,o_nnz)); 3864 3865 for (i=0; i<m; i++) { 3866 ii = i + rstart; 3867 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3868 } 3869 nooffprocentries = B->nooffprocentries; 3870 B->nooffprocentries = PETSC_TRUE; 3871 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3872 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3873 B->nooffprocentries = nooffprocentries; 3874 3875 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3876 PetscFunctionReturn(0); 3877 } 3878 3879 /*@ 3880 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3881 (the default parallel PETSc format). 3882 3883 Collective 3884 3885 Input Parameters: 3886 + B - the matrix 3887 . i - the indices into j for the start of each local row (starts with zero) 3888 . j - the column indices for each local row (starts with zero) 3889 - v - optional values in the matrix 3890 3891 Level: developer 3892 3893 Notes: 3894 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3895 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3896 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3897 3898 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3899 3900 The format which is used for the sparse matrix input, is equivalent to a 3901 row-major ordering.. i.e for the following matrix, the input data expected is 3902 as shown 3903 3904 $ 1 0 0 3905 $ 2 0 3 P0 3906 $ ------- 3907 $ 4 5 6 P1 3908 $ 3909 $ Process0 [P0]: rows_owned=[0,1] 3910 $ i = {0,1,3} [size = nrow+1 = 2+1] 3911 $ j = {0,0,2} [size = 3] 3912 $ v = {1,2,3} [size = 3] 3913 $ 3914 $ Process1 [P1]: rows_owned=[2] 3915 $ i = {0,3} [size = nrow+1 = 1+1] 3916 $ j = {0,1,2} [size = 3] 3917 $ v = {4,5,6} [size = 3] 3918 3919 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3920 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3921 @*/ 3922 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3923 { 3924 PetscFunctionBegin; 3925 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3926 PetscFunctionReturn(0); 3927 } 3928 3929 /*@C 3930 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3931 (the default parallel PETSc format). For good matrix assembly performance 3932 the user should preallocate the matrix storage by setting the parameters 3933 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3934 performance can be increased by more than a factor of 50. 3935 3936 Collective 3937 3938 Input Parameters: 3939 + B - the matrix 3940 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3941 (same value is used for all local rows) 3942 . d_nnz - array containing the number of nonzeros in the various rows of the 3943 DIAGONAL portion of the local submatrix (possibly different for each row) 3944 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3945 The size of this array is equal to the number of local rows, i.e 'm'. 3946 For matrices that will be factored, you must leave room for (and set) 3947 the diagonal entry even if it is zero. 3948 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3949 submatrix (same value is used for all local rows). 3950 - o_nnz - array containing the number of nonzeros in the various rows of the 3951 OFF-DIAGONAL portion of the local submatrix (possibly different for 3952 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3953 structure. The size of this array is equal to the number 3954 of local rows, i.e 'm'. 3955 3956 If the *_nnz parameter is given then the *_nz parameter is ignored 3957 3958 The AIJ format (also called the Yale sparse matrix format or 3959 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3960 storage. The stored row and column indices begin with zero. 3961 See Users-Manual: ch_mat for details. 3962 3963 The parallel matrix is partitioned such that the first m0 rows belong to 3964 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3965 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3966 3967 The DIAGONAL portion of the local submatrix of a processor can be defined 3968 as the submatrix which is obtained by extraction the part corresponding to 3969 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3970 first row that belongs to the processor, r2 is the last row belonging to 3971 the this processor, and c1-c2 is range of indices of the local part of a 3972 vector suitable for applying the matrix to. This is an mxn matrix. In the 3973 common case of a square matrix, the row and column ranges are the same and 3974 the DIAGONAL part is also square. The remaining portion of the local 3975 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3976 3977 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3978 3979 You can call MatGetInfo() to get information on how effective the preallocation was; 3980 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3981 You can also run with the option -info and look for messages with the string 3982 malloc in them to see if additional memory allocation was needed. 3983 3984 Example usage: 3985 3986 Consider the following 8x8 matrix with 34 non-zero values, that is 3987 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3988 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3989 as follows: 3990 3991 .vb 3992 1 2 0 | 0 3 0 | 0 4 3993 Proc0 0 5 6 | 7 0 0 | 8 0 3994 9 0 10 | 11 0 0 | 12 0 3995 ------------------------------------- 3996 13 0 14 | 15 16 17 | 0 0 3997 Proc1 0 18 0 | 19 20 21 | 0 0 3998 0 0 0 | 22 23 0 | 24 0 3999 ------------------------------------- 4000 Proc2 25 26 27 | 0 0 28 | 29 0 4001 30 0 0 | 31 32 33 | 0 34 4002 .ve 4003 4004 This can be represented as a collection of submatrices as: 4005 4006 .vb 4007 A B C 4008 D E F 4009 G H I 4010 .ve 4011 4012 Where the submatrices A,B,C are owned by proc0, D,E,F are 4013 owned by proc1, G,H,I are owned by proc2. 4014 4015 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4016 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4017 The 'M','N' parameters are 8,8, and have the same values on all procs. 4018 4019 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4020 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4021 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4022 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4023 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4024 matrix, ans [DF] as another SeqAIJ matrix. 4025 4026 When d_nz, o_nz parameters are specified, d_nz storage elements are 4027 allocated for every row of the local diagonal submatrix, and o_nz 4028 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4029 One way to choose d_nz and o_nz is to use the max nonzerors per local 4030 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4031 In this case, the values of d_nz,o_nz are: 4032 .vb 4033 proc0 : dnz = 2, o_nz = 2 4034 proc1 : dnz = 3, o_nz = 2 4035 proc2 : dnz = 1, o_nz = 4 4036 .ve 4037 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4038 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4039 for proc3. i.e we are using 12+15+10=37 storage locations to store 4040 34 values. 4041 4042 When d_nnz, o_nnz parameters are specified, the storage is specified 4043 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4044 In the above case the values for d_nnz,o_nnz are: 4045 .vb 4046 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4047 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4048 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4049 .ve 4050 Here the space allocated is sum of all the above values i.e 34, and 4051 hence pre-allocation is perfect. 4052 4053 Level: intermediate 4054 4055 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4056 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4057 @*/ 4058 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4059 { 4060 PetscFunctionBegin; 4061 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4062 PetscValidType(B,1); 4063 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4064 PetscFunctionReturn(0); 4065 } 4066 4067 /*@ 4068 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4069 CSR format for the local rows. 4070 4071 Collective 4072 4073 Input Parameters: 4074 + comm - MPI communicator 4075 . m - number of local rows (Cannot be PETSC_DECIDE) 4076 . n - This value should be the same as the local size used in creating the 4077 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4078 calculated if N is given) For square matrices n is almost always m. 4079 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4080 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4081 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4082 . j - column indices 4083 - a - matrix values 4084 4085 Output Parameter: 4086 . mat - the matrix 4087 4088 Level: intermediate 4089 4090 Notes: 4091 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4092 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4093 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4094 4095 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4096 4097 The format which is used for the sparse matrix input, is equivalent to a 4098 row-major ordering.. i.e for the following matrix, the input data expected is 4099 as shown 4100 4101 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4102 4103 $ 1 0 0 4104 $ 2 0 3 P0 4105 $ ------- 4106 $ 4 5 6 P1 4107 $ 4108 $ Process0 [P0]: rows_owned=[0,1] 4109 $ i = {0,1,3} [size = nrow+1 = 2+1] 4110 $ j = {0,0,2} [size = 3] 4111 $ v = {1,2,3} [size = 3] 4112 $ 4113 $ Process1 [P1]: rows_owned=[2] 4114 $ i = {0,3} [size = nrow+1 = 1+1] 4115 $ j = {0,1,2} [size = 3] 4116 $ v = {4,5,6} [size = 3] 4117 4118 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4119 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4120 @*/ 4121 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4122 { 4123 PetscFunctionBegin; 4124 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4125 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4126 PetscCall(MatCreate(comm,mat)); 4127 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4128 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4129 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4130 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4131 PetscFunctionReturn(0); 4132 } 4133 4134 /*@ 4135 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4136 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4137 4138 Collective 4139 4140 Input Parameters: 4141 + mat - the matrix 4142 . m - number of local rows (Cannot be PETSC_DECIDE) 4143 . n - This value should be the same as the local size used in creating the 4144 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4145 calculated if N is given) For square matrices n is almost always m. 4146 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4147 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4148 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4149 . J - column indices 4150 - v - matrix values 4151 4152 Level: intermediate 4153 4154 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4155 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4156 @*/ 4157 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4158 { 4159 PetscInt cstart,nnz,i,j; 4160 PetscInt *ld; 4161 PetscBool nooffprocentries; 4162 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4163 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4164 PetscScalar *ad,*ao; 4165 const PetscInt *Adi = Ad->i; 4166 PetscInt ldi,Iii,md; 4167 4168 PetscFunctionBegin; 4169 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4170 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4171 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4172 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4173 4174 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4175 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4176 cstart = mat->cmap->rstart; 4177 if (!Aij->ld) { 4178 /* count number of entries below block diagonal */ 4179 PetscCall(PetscCalloc1(m,&ld)); 4180 Aij->ld = ld; 4181 for (i=0; i<m; i++) { 4182 nnz = Ii[i+1]- Ii[i]; 4183 j = 0; 4184 while (J[j] < cstart && j < nnz) {j++;} 4185 J += nnz; 4186 ld[i] = j; 4187 } 4188 } else { 4189 ld = Aij->ld; 4190 } 4191 4192 for (i=0; i<m; i++) { 4193 nnz = Ii[i+1]- Ii[i]; 4194 Iii = Ii[i]; 4195 ldi = ld[i]; 4196 md = Adi[i+1]-Adi[i]; 4197 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4198 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4199 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4200 ad += md; 4201 ao += nnz - md; 4202 } 4203 nooffprocentries = mat->nooffprocentries; 4204 mat->nooffprocentries = PETSC_TRUE; 4205 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4206 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4207 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4208 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4209 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4210 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4211 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4212 mat->nooffprocentries = nooffprocentries; 4213 PetscFunctionReturn(0); 4214 } 4215 4216 /*@C 4217 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4218 (the default parallel PETSc format). For good matrix assembly performance 4219 the user should preallocate the matrix storage by setting the parameters 4220 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4221 performance can be increased by more than a factor of 50. 4222 4223 Collective 4224 4225 Input Parameters: 4226 + comm - MPI communicator 4227 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4228 This value should be the same as the local size used in creating the 4229 y vector for the matrix-vector product y = Ax. 4230 . n - This value should be the same as the local size used in creating the 4231 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4232 calculated if N is given) For square matrices n is almost always m. 4233 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4234 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4235 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4236 (same value is used for all local rows) 4237 . d_nnz - array containing the number of nonzeros in the various rows of the 4238 DIAGONAL portion of the local submatrix (possibly different for each row) 4239 or NULL, if d_nz is used to specify the nonzero structure. 4240 The size of this array is equal to the number of local rows, i.e 'm'. 4241 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4242 submatrix (same value is used for all local rows). 4243 - o_nnz - array containing the number of nonzeros in the various rows of the 4244 OFF-DIAGONAL portion of the local submatrix (possibly different for 4245 each row) or NULL, if o_nz is used to specify the nonzero 4246 structure. The size of this array is equal to the number 4247 of local rows, i.e 'm'. 4248 4249 Output Parameter: 4250 . A - the matrix 4251 4252 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4253 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4254 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4255 4256 Notes: 4257 If the *_nnz parameter is given then the *_nz parameter is ignored 4258 4259 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4260 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4261 storage requirements for this matrix. 4262 4263 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4264 processor than it must be used on all processors that share the object for 4265 that argument. 4266 4267 The user MUST specify either the local or global matrix dimensions 4268 (possibly both). 4269 4270 The parallel matrix is partitioned across processors such that the 4271 first m0 rows belong to process 0, the next m1 rows belong to 4272 process 1, the next m2 rows belong to process 2 etc.. where 4273 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4274 values corresponding to [m x N] submatrix. 4275 4276 The columns are logically partitioned with the n0 columns belonging 4277 to 0th partition, the next n1 columns belonging to the next 4278 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4279 4280 The DIAGONAL portion of the local submatrix on any given processor 4281 is the submatrix corresponding to the rows and columns m,n 4282 corresponding to the given processor. i.e diagonal matrix on 4283 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4284 etc. The remaining portion of the local submatrix [m x (N-n)] 4285 constitute the OFF-DIAGONAL portion. The example below better 4286 illustrates this concept. 4287 4288 For a square global matrix we define each processor's diagonal portion 4289 to be its local rows and the corresponding columns (a square submatrix); 4290 each processor's off-diagonal portion encompasses the remainder of the 4291 local matrix (a rectangular submatrix). 4292 4293 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4294 4295 When calling this routine with a single process communicator, a matrix of 4296 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4297 type of communicator, use the construction mechanism 4298 .vb 4299 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4300 .ve 4301 4302 $ MatCreate(...,&A); 4303 $ MatSetType(A,MATMPIAIJ); 4304 $ MatSetSizes(A, m,n,M,N); 4305 $ MatMPIAIJSetPreallocation(A,...); 4306 4307 By default, this format uses inodes (identical nodes) when possible. 4308 We search for consecutive rows with the same nonzero structure, thereby 4309 reusing matrix information to achieve increased efficiency. 4310 4311 Options Database Keys: 4312 + -mat_no_inode - Do not use inodes 4313 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4314 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4315 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4316 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4317 4318 Example usage: 4319 4320 Consider the following 8x8 matrix with 34 non-zero values, that is 4321 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4322 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4323 as follows 4324 4325 .vb 4326 1 2 0 | 0 3 0 | 0 4 4327 Proc0 0 5 6 | 7 0 0 | 8 0 4328 9 0 10 | 11 0 0 | 12 0 4329 ------------------------------------- 4330 13 0 14 | 15 16 17 | 0 0 4331 Proc1 0 18 0 | 19 20 21 | 0 0 4332 0 0 0 | 22 23 0 | 24 0 4333 ------------------------------------- 4334 Proc2 25 26 27 | 0 0 28 | 29 0 4335 30 0 0 | 31 32 33 | 0 34 4336 .ve 4337 4338 This can be represented as a collection of submatrices as 4339 4340 .vb 4341 A B C 4342 D E F 4343 G H I 4344 .ve 4345 4346 Where the submatrices A,B,C are owned by proc0, D,E,F are 4347 owned by proc1, G,H,I are owned by proc2. 4348 4349 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4350 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4351 The 'M','N' parameters are 8,8, and have the same values on all procs. 4352 4353 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4354 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4355 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4356 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4357 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4358 matrix, ans [DF] as another SeqAIJ matrix. 4359 4360 When d_nz, o_nz parameters are specified, d_nz storage elements are 4361 allocated for every row of the local diagonal submatrix, and o_nz 4362 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4363 One way to choose d_nz and o_nz is to use the max nonzerors per local 4364 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4365 In this case, the values of d_nz,o_nz are 4366 .vb 4367 proc0 : dnz = 2, o_nz = 2 4368 proc1 : dnz = 3, o_nz = 2 4369 proc2 : dnz = 1, o_nz = 4 4370 .ve 4371 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4372 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4373 for proc3. i.e we are using 12+15+10=37 storage locations to store 4374 34 values. 4375 4376 When d_nnz, o_nnz parameters are specified, the storage is specified 4377 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4378 In the above case the values for d_nnz,o_nnz are 4379 .vb 4380 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4381 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4382 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4383 .ve 4384 Here the space allocated is sum of all the above values i.e 34, and 4385 hence pre-allocation is perfect. 4386 4387 Level: intermediate 4388 4389 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4390 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4391 @*/ 4392 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4393 { 4394 PetscMPIInt size; 4395 4396 PetscFunctionBegin; 4397 PetscCall(MatCreate(comm,A)); 4398 PetscCall(MatSetSizes(*A,m,n,M,N)); 4399 PetscCallMPI(MPI_Comm_size(comm,&size)); 4400 if (size > 1) { 4401 PetscCall(MatSetType(*A,MATMPIAIJ)); 4402 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4403 } else { 4404 PetscCall(MatSetType(*A,MATSEQAIJ)); 4405 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4406 } 4407 PetscFunctionReturn(0); 4408 } 4409 4410 /*@C 4411 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4412 4413 Not collective 4414 4415 Input Parameter: 4416 . A - The MPIAIJ matrix 4417 4418 Output Parameters: 4419 + Ad - The local diagonal block as a SeqAIJ matrix 4420 . Ao - The local off-diagonal block as a SeqAIJ matrix 4421 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4422 4423 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4424 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4425 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4426 local column numbers to global column numbers in the original matrix. 4427 4428 Level: intermediate 4429 4430 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4431 @*/ 4432 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4433 { 4434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4435 PetscBool flg; 4436 4437 PetscFunctionBegin; 4438 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4439 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4440 if (Ad) *Ad = a->A; 4441 if (Ao) *Ao = a->B; 4442 if (colmap) *colmap = a->garray; 4443 PetscFunctionReturn(0); 4444 } 4445 4446 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4447 { 4448 PetscInt m,N,i,rstart,nnz,Ii; 4449 PetscInt *indx; 4450 PetscScalar *values; 4451 MatType rootType; 4452 4453 PetscFunctionBegin; 4454 PetscCall(MatGetSize(inmat,&m,&N)); 4455 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4456 PetscInt *dnz,*onz,sum,bs,cbs; 4457 4458 if (n == PETSC_DECIDE) { 4459 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4460 } 4461 /* Check sum(n) = N */ 4462 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4463 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4464 4465 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4466 rstart -= m; 4467 4468 MatPreallocateBegin(comm,m,n,dnz,onz); 4469 for (i=0; i<m; i++) { 4470 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4471 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4472 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4473 } 4474 4475 PetscCall(MatCreate(comm,outmat)); 4476 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4477 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4478 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4479 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4480 PetscCall(MatSetType(*outmat,rootType)); 4481 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4482 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4483 MatPreallocateEnd(dnz,onz); 4484 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4485 } 4486 4487 /* numeric phase */ 4488 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4489 for (i=0; i<m; i++) { 4490 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4491 Ii = i + rstart; 4492 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4493 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4494 } 4495 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4496 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4497 PetscFunctionReturn(0); 4498 } 4499 4500 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4501 { 4502 PetscMPIInt rank; 4503 PetscInt m,N,i,rstart,nnz; 4504 size_t len; 4505 const PetscInt *indx; 4506 PetscViewer out; 4507 char *name; 4508 Mat B; 4509 const PetscScalar *values; 4510 4511 PetscFunctionBegin; 4512 PetscCall(MatGetLocalSize(A,&m,NULL)); 4513 PetscCall(MatGetSize(A,NULL,&N)); 4514 /* Should this be the type of the diagonal block of A? */ 4515 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4516 PetscCall(MatSetSizes(B,m,N,m,N)); 4517 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4518 PetscCall(MatSetType(B,MATSEQAIJ)); 4519 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4520 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4521 for (i=0; i<m; i++) { 4522 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4523 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4524 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4525 } 4526 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4527 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4528 4529 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4530 PetscCall(PetscStrlen(outfile,&len)); 4531 PetscCall(PetscMalloc1(len+6,&name)); 4532 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4533 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4534 PetscCall(PetscFree(name)); 4535 PetscCall(MatView(B,out)); 4536 PetscCall(PetscViewerDestroy(&out)); 4537 PetscCall(MatDestroy(&B)); 4538 PetscFunctionReturn(0); 4539 } 4540 4541 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4542 { 4543 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4544 4545 PetscFunctionBegin; 4546 if (!merge) PetscFunctionReturn(0); 4547 PetscCall(PetscFree(merge->id_r)); 4548 PetscCall(PetscFree(merge->len_s)); 4549 PetscCall(PetscFree(merge->len_r)); 4550 PetscCall(PetscFree(merge->bi)); 4551 PetscCall(PetscFree(merge->bj)); 4552 PetscCall(PetscFree(merge->buf_ri[0])); 4553 PetscCall(PetscFree(merge->buf_ri)); 4554 PetscCall(PetscFree(merge->buf_rj[0])); 4555 PetscCall(PetscFree(merge->buf_rj)); 4556 PetscCall(PetscFree(merge->coi)); 4557 PetscCall(PetscFree(merge->coj)); 4558 PetscCall(PetscFree(merge->owners_co)); 4559 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4560 PetscCall(PetscFree(merge)); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 #include <../src/mat/utils/freespace.h> 4565 #include <petscbt.h> 4566 4567 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4568 { 4569 MPI_Comm comm; 4570 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4571 PetscMPIInt size,rank,taga,*len_s; 4572 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4573 PetscInt proc,m; 4574 PetscInt **buf_ri,**buf_rj; 4575 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4576 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4577 MPI_Request *s_waits,*r_waits; 4578 MPI_Status *status; 4579 const MatScalar *aa,*a_a; 4580 MatScalar **abuf_r,*ba_i; 4581 Mat_Merge_SeqsToMPI *merge; 4582 PetscContainer container; 4583 4584 PetscFunctionBegin; 4585 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4586 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4587 4588 PetscCallMPI(MPI_Comm_size(comm,&size)); 4589 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4590 4591 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4592 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4593 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4594 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4595 aa = a_a; 4596 4597 bi = merge->bi; 4598 bj = merge->bj; 4599 buf_ri = merge->buf_ri; 4600 buf_rj = merge->buf_rj; 4601 4602 PetscCall(PetscMalloc1(size,&status)); 4603 owners = merge->rowmap->range; 4604 len_s = merge->len_s; 4605 4606 /* send and recv matrix values */ 4607 /*-----------------------------*/ 4608 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4609 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4610 4611 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4612 for (proc=0,k=0; proc<size; proc++) { 4613 if (!len_s[proc]) continue; 4614 i = owners[proc]; 4615 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4616 k++; 4617 } 4618 4619 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4620 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4621 PetscCall(PetscFree(status)); 4622 4623 PetscCall(PetscFree(s_waits)); 4624 PetscCall(PetscFree(r_waits)); 4625 4626 /* insert mat values of mpimat */ 4627 /*----------------------------*/ 4628 PetscCall(PetscMalloc1(N,&ba_i)); 4629 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4630 4631 for (k=0; k<merge->nrecv; k++) { 4632 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4633 nrows = *(buf_ri_k[k]); 4634 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4635 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4636 } 4637 4638 /* set values of ba */ 4639 m = merge->rowmap->n; 4640 for (i=0; i<m; i++) { 4641 arow = owners[rank] + i; 4642 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4643 bnzi = bi[i+1] - bi[i]; 4644 PetscCall(PetscArrayzero(ba_i,bnzi)); 4645 4646 /* add local non-zero vals of this proc's seqmat into ba */ 4647 anzi = ai[arow+1] - ai[arow]; 4648 aj = a->j + ai[arow]; 4649 aa = a_a + ai[arow]; 4650 nextaj = 0; 4651 for (j=0; nextaj<anzi; j++) { 4652 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4653 ba_i[j] += aa[nextaj++]; 4654 } 4655 } 4656 4657 /* add received vals into ba */ 4658 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4659 /* i-th row */ 4660 if (i == *nextrow[k]) { 4661 anzi = *(nextai[k]+1) - *nextai[k]; 4662 aj = buf_rj[k] + *(nextai[k]); 4663 aa = abuf_r[k] + *(nextai[k]); 4664 nextaj = 0; 4665 for (j=0; nextaj<anzi; j++) { 4666 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4667 ba_i[j] += aa[nextaj++]; 4668 } 4669 } 4670 nextrow[k]++; nextai[k]++; 4671 } 4672 } 4673 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4674 } 4675 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4676 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4677 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4678 4679 PetscCall(PetscFree(abuf_r[0])); 4680 PetscCall(PetscFree(abuf_r)); 4681 PetscCall(PetscFree(ba_i)); 4682 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4683 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4684 PetscFunctionReturn(0); 4685 } 4686 4687 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4688 { 4689 Mat B_mpi; 4690 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4691 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4692 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4693 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4694 PetscInt len,proc,*dnz,*onz,bs,cbs; 4695 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4696 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4697 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4698 MPI_Status *status; 4699 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4700 PetscBT lnkbt; 4701 Mat_Merge_SeqsToMPI *merge; 4702 PetscContainer container; 4703 4704 PetscFunctionBegin; 4705 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4706 4707 /* make sure it is a PETSc comm */ 4708 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4709 PetscCallMPI(MPI_Comm_size(comm,&size)); 4710 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4711 4712 PetscCall(PetscNew(&merge)); 4713 PetscCall(PetscMalloc1(size,&status)); 4714 4715 /* determine row ownership */ 4716 /*---------------------------------------------------------*/ 4717 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4718 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4719 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4720 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4721 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4722 PetscCall(PetscMalloc1(size,&len_si)); 4723 PetscCall(PetscMalloc1(size,&merge->len_s)); 4724 4725 m = merge->rowmap->n; 4726 owners = merge->rowmap->range; 4727 4728 /* determine the number of messages to send, their lengths */ 4729 /*---------------------------------------------------------*/ 4730 len_s = merge->len_s; 4731 4732 len = 0; /* length of buf_si[] */ 4733 merge->nsend = 0; 4734 for (proc=0; proc<size; proc++) { 4735 len_si[proc] = 0; 4736 if (proc == rank) { 4737 len_s[proc] = 0; 4738 } else { 4739 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4740 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4741 } 4742 if (len_s[proc]) { 4743 merge->nsend++; 4744 nrows = 0; 4745 for (i=owners[proc]; i<owners[proc+1]; i++) { 4746 if (ai[i+1] > ai[i]) nrows++; 4747 } 4748 len_si[proc] = 2*(nrows+1); 4749 len += len_si[proc]; 4750 } 4751 } 4752 4753 /* determine the number and length of messages to receive for ij-structure */ 4754 /*-------------------------------------------------------------------------*/ 4755 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4756 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4757 4758 /* post the Irecv of j-structure */ 4759 /*-------------------------------*/ 4760 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4761 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4762 4763 /* post the Isend of j-structure */ 4764 /*--------------------------------*/ 4765 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4766 4767 for (proc=0, k=0; proc<size; proc++) { 4768 if (!len_s[proc]) continue; 4769 i = owners[proc]; 4770 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4771 k++; 4772 } 4773 4774 /* receives and sends of j-structure are complete */ 4775 /*------------------------------------------------*/ 4776 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4777 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4778 4779 /* send and recv i-structure */ 4780 /*---------------------------*/ 4781 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4782 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4783 4784 PetscCall(PetscMalloc1(len+1,&buf_s)); 4785 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4786 for (proc=0,k=0; proc<size; proc++) { 4787 if (!len_s[proc]) continue; 4788 /* form outgoing message for i-structure: 4789 buf_si[0]: nrows to be sent 4790 [1:nrows]: row index (global) 4791 [nrows+1:2*nrows+1]: i-structure index 4792 */ 4793 /*-------------------------------------------*/ 4794 nrows = len_si[proc]/2 - 1; 4795 buf_si_i = buf_si + nrows+1; 4796 buf_si[0] = nrows; 4797 buf_si_i[0] = 0; 4798 nrows = 0; 4799 for (i=owners[proc]; i<owners[proc+1]; i++) { 4800 anzi = ai[i+1] - ai[i]; 4801 if (anzi) { 4802 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4803 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4804 nrows++; 4805 } 4806 } 4807 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4808 k++; 4809 buf_si += len_si[proc]; 4810 } 4811 4812 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4813 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4814 4815 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4816 for (i=0; i<merge->nrecv; i++) { 4817 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4818 } 4819 4820 PetscCall(PetscFree(len_si)); 4821 PetscCall(PetscFree(len_ri)); 4822 PetscCall(PetscFree(rj_waits)); 4823 PetscCall(PetscFree2(si_waits,sj_waits)); 4824 PetscCall(PetscFree(ri_waits)); 4825 PetscCall(PetscFree(buf_s)); 4826 PetscCall(PetscFree(status)); 4827 4828 /* compute a local seq matrix in each processor */ 4829 /*----------------------------------------------*/ 4830 /* allocate bi array and free space for accumulating nonzero column info */ 4831 PetscCall(PetscMalloc1(m+1,&bi)); 4832 bi[0] = 0; 4833 4834 /* create and initialize a linked list */ 4835 nlnk = N+1; 4836 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4837 4838 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4839 len = ai[owners[rank+1]] - ai[owners[rank]]; 4840 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4841 4842 current_space = free_space; 4843 4844 /* determine symbolic info for each local row */ 4845 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4846 4847 for (k=0; k<merge->nrecv; k++) { 4848 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4849 nrows = *buf_ri_k[k]; 4850 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4851 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4852 } 4853 4854 MatPreallocateBegin(comm,m,n,dnz,onz); 4855 len = 0; 4856 for (i=0; i<m; i++) { 4857 bnzi = 0; 4858 /* add local non-zero cols of this proc's seqmat into lnk */ 4859 arow = owners[rank] + i; 4860 anzi = ai[arow+1] - ai[arow]; 4861 aj = a->j + ai[arow]; 4862 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4863 bnzi += nlnk; 4864 /* add received col data into lnk */ 4865 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4866 if (i == *nextrow[k]) { /* i-th row */ 4867 anzi = *(nextai[k]+1) - *nextai[k]; 4868 aj = buf_rj[k] + *nextai[k]; 4869 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4870 bnzi += nlnk; 4871 nextrow[k]++; nextai[k]++; 4872 } 4873 } 4874 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4875 4876 /* if free space is not available, make more free space */ 4877 if (current_space->local_remaining<bnzi) { 4878 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4879 nspacedouble++; 4880 } 4881 /* copy data into free space, then initialize lnk */ 4882 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4883 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4884 4885 current_space->array += bnzi; 4886 current_space->local_used += bnzi; 4887 current_space->local_remaining -= bnzi; 4888 4889 bi[i+1] = bi[i] + bnzi; 4890 } 4891 4892 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4893 4894 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4895 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4896 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4897 4898 /* create symbolic parallel matrix B_mpi */ 4899 /*---------------------------------------*/ 4900 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4901 PetscCall(MatCreate(comm,&B_mpi)); 4902 if (n==PETSC_DECIDE) { 4903 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4904 } else { 4905 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4906 } 4907 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4908 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4909 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4910 MatPreallocateEnd(dnz,onz); 4911 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4912 4913 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4914 B_mpi->assembled = PETSC_FALSE; 4915 merge->bi = bi; 4916 merge->bj = bj; 4917 merge->buf_ri = buf_ri; 4918 merge->buf_rj = buf_rj; 4919 merge->coi = NULL; 4920 merge->coj = NULL; 4921 merge->owners_co = NULL; 4922 4923 PetscCall(PetscCommDestroy(&comm)); 4924 4925 /* attach the supporting struct to B_mpi for reuse */ 4926 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4927 PetscCall(PetscContainerSetPointer(container,merge)); 4928 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4929 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4930 PetscCall(PetscContainerDestroy(&container)); 4931 *mpimat = B_mpi; 4932 4933 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 /*@C 4938 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4939 matrices from each processor 4940 4941 Collective 4942 4943 Input Parameters: 4944 + comm - the communicators the parallel matrix will live on 4945 . seqmat - the input sequential matrices 4946 . m - number of local rows (or PETSC_DECIDE) 4947 . n - number of local columns (or PETSC_DECIDE) 4948 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4949 4950 Output Parameter: 4951 . mpimat - the parallel matrix generated 4952 4953 Level: advanced 4954 4955 Notes: 4956 The dimensions of the sequential matrix in each processor MUST be the same. 4957 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4958 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4959 @*/ 4960 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4961 { 4962 PetscMPIInt size; 4963 4964 PetscFunctionBegin; 4965 PetscCallMPI(MPI_Comm_size(comm,&size)); 4966 if (size == 1) { 4967 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4968 if (scall == MAT_INITIAL_MATRIX) { 4969 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4970 } else { 4971 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4972 } 4973 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4974 PetscFunctionReturn(0); 4975 } 4976 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4977 if (scall == MAT_INITIAL_MATRIX) { 4978 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4979 } 4980 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4981 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4982 PetscFunctionReturn(0); 4983 } 4984 4985 /*@ 4986 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4987 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4988 with MatGetSize() 4989 4990 Not Collective 4991 4992 Input Parameters: 4993 + A - the matrix 4994 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4995 4996 Output Parameter: 4997 . A_loc - the local sequential matrix generated 4998 4999 Level: developer 5000 5001 Notes: 5002 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5003 5004 Destroy the matrix with MatDestroy() 5005 5006 .seealso: MatMPIAIJGetLocalMat() 5007 5008 @*/ 5009 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5010 { 5011 PetscBool mpi; 5012 5013 PetscFunctionBegin; 5014 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5015 if (mpi) { 5016 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5017 } else { 5018 *A_loc = A; 5019 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5020 } 5021 PetscFunctionReturn(0); 5022 } 5023 5024 /*@ 5025 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5026 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5027 with MatGetSize() 5028 5029 Not Collective 5030 5031 Input Parameters: 5032 + A - the matrix 5033 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5034 5035 Output Parameter: 5036 . A_loc - the local sequential matrix generated 5037 5038 Level: developer 5039 5040 Notes: 5041 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5042 5043 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5044 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5045 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5046 modify the values of the returned A_loc. 5047 5048 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5049 @*/ 5050 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5051 { 5052 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5053 Mat_SeqAIJ *mat,*a,*b; 5054 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5055 const PetscScalar *aa,*ba,*aav,*bav; 5056 PetscScalar *ca,*cam; 5057 PetscMPIInt size; 5058 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5059 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5060 PetscBool match; 5061 5062 PetscFunctionBegin; 5063 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5064 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5065 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5066 if (size == 1) { 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5069 *A_loc = mpimat->A; 5070 } else if (scall == MAT_REUSE_MATRIX) { 5071 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5072 } 5073 PetscFunctionReturn(0); 5074 } 5075 5076 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5077 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5078 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5079 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5080 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5081 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5082 aa = aav; 5083 ba = bav; 5084 if (scall == MAT_INITIAL_MATRIX) { 5085 PetscCall(PetscMalloc1(1+am,&ci)); 5086 ci[0] = 0; 5087 for (i=0; i<am; i++) { 5088 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5089 } 5090 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5091 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5092 k = 0; 5093 for (i=0; i<am; i++) { 5094 ncols_o = bi[i+1] - bi[i]; 5095 ncols_d = ai[i+1] - ai[i]; 5096 /* off-diagonal portion of A */ 5097 for (jo=0; jo<ncols_o; jo++) { 5098 col = cmap[*bj]; 5099 if (col >= cstart) break; 5100 cj[k] = col; bj++; 5101 ca[k++] = *ba++; 5102 } 5103 /* diagonal portion of A */ 5104 for (j=0; j<ncols_d; j++) { 5105 cj[k] = cstart + *aj++; 5106 ca[k++] = *aa++; 5107 } 5108 /* off-diagonal portion of A */ 5109 for (j=jo; j<ncols_o; j++) { 5110 cj[k] = cmap[*bj++]; 5111 ca[k++] = *ba++; 5112 } 5113 } 5114 /* put together the new matrix */ 5115 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5116 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5117 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5118 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5119 mat->free_a = PETSC_TRUE; 5120 mat->free_ij = PETSC_TRUE; 5121 mat->nonew = 0; 5122 } else if (scall == MAT_REUSE_MATRIX) { 5123 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5124 ci = mat->i; 5125 cj = mat->j; 5126 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5127 for (i=0; i<am; i++) { 5128 /* off-diagonal portion of A */ 5129 ncols_o = bi[i+1] - bi[i]; 5130 for (jo=0; jo<ncols_o; jo++) { 5131 col = cmap[*bj]; 5132 if (col >= cstart) break; 5133 *cam++ = *ba++; bj++; 5134 } 5135 /* diagonal portion of A */ 5136 ncols_d = ai[i+1] - ai[i]; 5137 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5138 /* off-diagonal portion of A */ 5139 for (j=jo; j<ncols_o; j++) { 5140 *cam++ = *ba++; bj++; 5141 } 5142 } 5143 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5144 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5145 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5146 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5147 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 /*@ 5152 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5153 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5154 5155 Not Collective 5156 5157 Input Parameters: 5158 + A - the matrix 5159 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5160 5161 Output Parameters: 5162 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5163 - A_loc - the local sequential matrix generated 5164 5165 Level: developer 5166 5167 Notes: 5168 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5169 5170 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5171 5172 @*/ 5173 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5174 { 5175 Mat Ao,Ad; 5176 const PetscInt *cmap; 5177 PetscMPIInt size; 5178 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5179 5180 PetscFunctionBegin; 5181 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5182 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5183 if (size == 1) { 5184 if (scall == MAT_INITIAL_MATRIX) { 5185 PetscCall(PetscObjectReference((PetscObject)Ad)); 5186 *A_loc = Ad; 5187 } else if (scall == MAT_REUSE_MATRIX) { 5188 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5189 } 5190 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5191 PetscFunctionReturn(0); 5192 } 5193 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5194 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5195 if (f) { 5196 PetscCall((*f)(A,scall,glob,A_loc)); 5197 } else { 5198 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5199 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5200 Mat_SeqAIJ *c; 5201 PetscInt *ai = a->i, *aj = a->j; 5202 PetscInt *bi = b->i, *bj = b->j; 5203 PetscInt *ci,*cj; 5204 const PetscScalar *aa,*ba; 5205 PetscScalar *ca; 5206 PetscInt i,j,am,dn,on; 5207 5208 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5209 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5210 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5211 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5212 if (scall == MAT_INITIAL_MATRIX) { 5213 PetscInt k; 5214 PetscCall(PetscMalloc1(1+am,&ci)); 5215 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5216 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5217 ci[0] = 0; 5218 for (i=0,k=0; i<am; i++) { 5219 const PetscInt ncols_o = bi[i+1] - bi[i]; 5220 const PetscInt ncols_d = ai[i+1] - ai[i]; 5221 ci[i+1] = ci[i] + ncols_o + ncols_d; 5222 /* diagonal portion of A */ 5223 for (j=0; j<ncols_d; j++,k++) { 5224 cj[k] = *aj++; 5225 ca[k] = *aa++; 5226 } 5227 /* off-diagonal portion of A */ 5228 for (j=0; j<ncols_o; j++,k++) { 5229 cj[k] = dn + *bj++; 5230 ca[k] = *ba++; 5231 } 5232 } 5233 /* put together the new matrix */ 5234 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5235 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5236 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5237 c = (Mat_SeqAIJ*)(*A_loc)->data; 5238 c->free_a = PETSC_TRUE; 5239 c->free_ij = PETSC_TRUE; 5240 c->nonew = 0; 5241 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5242 } else if (scall == MAT_REUSE_MATRIX) { 5243 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5244 for (i=0; i<am; i++) { 5245 const PetscInt ncols_d = ai[i+1] - ai[i]; 5246 const PetscInt ncols_o = bi[i+1] - bi[i]; 5247 /* diagonal portion of A */ 5248 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5249 /* off-diagonal portion of A */ 5250 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5251 } 5252 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5253 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5254 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5255 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5256 if (glob) { 5257 PetscInt cst, *gidx; 5258 5259 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5260 PetscCall(PetscMalloc1(dn+on,&gidx)); 5261 for (i=0; i<dn; i++) gidx[i] = cst + i; 5262 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5263 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5264 } 5265 } 5266 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5267 PetscFunctionReturn(0); 5268 } 5269 5270 /*@C 5271 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5272 5273 Not Collective 5274 5275 Input Parameters: 5276 + A - the matrix 5277 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5278 - row, col - index sets of rows and columns to extract (or NULL) 5279 5280 Output Parameter: 5281 . A_loc - the local sequential matrix generated 5282 5283 Level: developer 5284 5285 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5286 5287 @*/ 5288 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5289 { 5290 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5291 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5292 IS isrowa,iscola; 5293 Mat *aloc; 5294 PetscBool match; 5295 5296 PetscFunctionBegin; 5297 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5298 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5299 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5300 if (!row) { 5301 start = A->rmap->rstart; end = A->rmap->rend; 5302 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5303 } else { 5304 isrowa = *row; 5305 } 5306 if (!col) { 5307 start = A->cmap->rstart; 5308 cmap = a->garray; 5309 nzA = a->A->cmap->n; 5310 nzB = a->B->cmap->n; 5311 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5312 ncols = 0; 5313 for (i=0; i<nzB; i++) { 5314 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5315 else break; 5316 } 5317 imark = i; 5318 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5319 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5320 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5321 } else { 5322 iscola = *col; 5323 } 5324 if (scall != MAT_INITIAL_MATRIX) { 5325 PetscCall(PetscMalloc1(1,&aloc)); 5326 aloc[0] = *A_loc; 5327 } 5328 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5329 if (!col) { /* attach global id of condensed columns */ 5330 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5331 } 5332 *A_loc = aloc[0]; 5333 PetscCall(PetscFree(aloc)); 5334 if (!row) { 5335 PetscCall(ISDestroy(&isrowa)); 5336 } 5337 if (!col) { 5338 PetscCall(ISDestroy(&iscola)); 5339 } 5340 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5341 PetscFunctionReturn(0); 5342 } 5343 5344 /* 5345 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5346 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5347 * on a global size. 5348 * */ 5349 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5350 { 5351 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5352 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5353 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5354 PetscMPIInt owner; 5355 PetscSFNode *iremote,*oiremote; 5356 const PetscInt *lrowindices; 5357 PetscSF sf,osf; 5358 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5359 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5360 MPI_Comm comm; 5361 ISLocalToGlobalMapping mapping; 5362 const PetscScalar *pd_a,*po_a; 5363 5364 PetscFunctionBegin; 5365 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5366 /* plocalsize is the number of roots 5367 * nrows is the number of leaves 5368 * */ 5369 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5370 PetscCall(ISGetLocalSize(rows,&nrows)); 5371 PetscCall(PetscCalloc1(nrows,&iremote)); 5372 PetscCall(ISGetIndices(rows,&lrowindices)); 5373 for (i=0;i<nrows;i++) { 5374 /* Find a remote index and an owner for a row 5375 * The row could be local or remote 5376 * */ 5377 owner = 0; 5378 lidx = 0; 5379 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5380 iremote[i].index = lidx; 5381 iremote[i].rank = owner; 5382 } 5383 /* Create SF to communicate how many nonzero columns for each row */ 5384 PetscCall(PetscSFCreate(comm,&sf)); 5385 /* SF will figure out the number of nonzero colunms for each row, and their 5386 * offsets 5387 * */ 5388 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5389 PetscCall(PetscSFSetFromOptions(sf)); 5390 PetscCall(PetscSFSetUp(sf)); 5391 5392 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5393 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5394 PetscCall(PetscCalloc1(nrows,&pnnz)); 5395 roffsets[0] = 0; 5396 roffsets[1] = 0; 5397 for (i=0;i<plocalsize;i++) { 5398 /* diag */ 5399 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5400 /* off diag */ 5401 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5402 /* compute offsets so that we relative location for each row */ 5403 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5404 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5405 } 5406 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5407 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5408 /* 'r' means root, and 'l' means leaf */ 5409 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5410 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5411 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5412 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5413 PetscCall(PetscSFDestroy(&sf)); 5414 PetscCall(PetscFree(roffsets)); 5415 PetscCall(PetscFree(nrcols)); 5416 dntotalcols = 0; 5417 ontotalcols = 0; 5418 ncol = 0; 5419 for (i=0;i<nrows;i++) { 5420 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5421 ncol = PetscMax(pnnz[i],ncol); 5422 /* diag */ 5423 dntotalcols += nlcols[i*2+0]; 5424 /* off diag */ 5425 ontotalcols += nlcols[i*2+1]; 5426 } 5427 /* We do not need to figure the right number of columns 5428 * since all the calculations will be done by going through the raw data 5429 * */ 5430 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5431 PetscCall(MatSetUp(*P_oth)); 5432 PetscCall(PetscFree(pnnz)); 5433 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5434 /* diag */ 5435 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5436 /* off diag */ 5437 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5438 /* diag */ 5439 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5440 /* off diag */ 5441 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5442 dntotalcols = 0; 5443 ontotalcols = 0; 5444 ntotalcols = 0; 5445 for (i=0;i<nrows;i++) { 5446 owner = 0; 5447 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5448 /* Set iremote for diag matrix */ 5449 for (j=0;j<nlcols[i*2+0];j++) { 5450 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5451 iremote[dntotalcols].rank = owner; 5452 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5453 ilocal[dntotalcols++] = ntotalcols++; 5454 } 5455 /* off diag */ 5456 for (j=0;j<nlcols[i*2+1];j++) { 5457 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5458 oiremote[ontotalcols].rank = owner; 5459 oilocal[ontotalcols++] = ntotalcols++; 5460 } 5461 } 5462 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5463 PetscCall(PetscFree(loffsets)); 5464 PetscCall(PetscFree(nlcols)); 5465 PetscCall(PetscSFCreate(comm,&sf)); 5466 /* P serves as roots and P_oth is leaves 5467 * Diag matrix 5468 * */ 5469 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5470 PetscCall(PetscSFSetFromOptions(sf)); 5471 PetscCall(PetscSFSetUp(sf)); 5472 5473 PetscCall(PetscSFCreate(comm,&osf)); 5474 /* Off diag */ 5475 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5476 PetscCall(PetscSFSetFromOptions(osf)); 5477 PetscCall(PetscSFSetUp(osf)); 5478 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5479 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5480 /* We operate on the matrix internal data for saving memory */ 5481 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5482 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5483 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5484 /* Convert to global indices for diag matrix */ 5485 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5486 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5487 /* We want P_oth store global indices */ 5488 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5489 /* Use memory scalable approach */ 5490 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5491 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5492 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5493 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5494 /* Convert back to local indices */ 5495 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5496 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5497 nout = 0; 5498 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5499 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5500 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5501 /* Exchange values */ 5502 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5503 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5504 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5505 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5506 /* Stop PETSc from shrinking memory */ 5507 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5508 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5509 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5510 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5511 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5512 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5513 PetscCall(PetscSFDestroy(&sf)); 5514 PetscCall(PetscSFDestroy(&osf)); 5515 PetscFunctionReturn(0); 5516 } 5517 5518 /* 5519 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5520 * This supports MPIAIJ and MAIJ 5521 * */ 5522 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5523 { 5524 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5525 Mat_SeqAIJ *p_oth; 5526 IS rows,map; 5527 PetscHMapI hamp; 5528 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5529 MPI_Comm comm; 5530 PetscSF sf,osf; 5531 PetscBool has; 5532 5533 PetscFunctionBegin; 5534 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5535 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5536 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5537 * and then create a submatrix (that often is an overlapping matrix) 5538 * */ 5539 if (reuse == MAT_INITIAL_MATRIX) { 5540 /* Use a hash table to figure out unique keys */ 5541 PetscCall(PetscHMapICreate(&hamp)); 5542 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5543 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5544 count = 0; 5545 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5546 for (i=0;i<a->B->cmap->n;i++) { 5547 key = a->garray[i]/dof; 5548 PetscCall(PetscHMapIHas(hamp,key,&has)); 5549 if (!has) { 5550 mapping[i] = count; 5551 PetscCall(PetscHMapISet(hamp,key,count++)); 5552 } else { 5553 /* Current 'i' has the same value the previous step */ 5554 mapping[i] = count-1; 5555 } 5556 } 5557 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5558 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5559 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5560 PetscCall(PetscCalloc1(htsize,&rowindices)); 5561 off = 0; 5562 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5563 PetscCall(PetscHMapIDestroy(&hamp)); 5564 PetscCall(PetscSortInt(htsize,rowindices)); 5565 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5566 /* In case, the matrix was already created but users want to recreate the matrix */ 5567 PetscCall(MatDestroy(P_oth)); 5568 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5569 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5570 PetscCall(ISDestroy(&map)); 5571 PetscCall(ISDestroy(&rows)); 5572 } else if (reuse == MAT_REUSE_MATRIX) { 5573 /* If matrix was already created, we simply update values using SF objects 5574 * that as attached to the matrix ealier. 5575 */ 5576 const PetscScalar *pd_a,*po_a; 5577 5578 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5579 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5580 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5581 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5582 /* Update values in place */ 5583 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5584 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5585 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5586 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5587 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5588 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5589 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5590 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5591 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5592 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5593 PetscFunctionReturn(0); 5594 } 5595 5596 /*@C 5597 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5598 5599 Collective on Mat 5600 5601 Input Parameters: 5602 + A - the first matrix in mpiaij format 5603 . B - the second matrix in mpiaij format 5604 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5605 5606 Output Parameters: 5607 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5608 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5609 - B_seq - the sequential matrix generated 5610 5611 Level: developer 5612 5613 @*/ 5614 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5615 { 5616 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5617 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5618 IS isrowb,iscolb; 5619 Mat *bseq=NULL; 5620 5621 PetscFunctionBegin; 5622 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5623 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5624 } 5625 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5626 5627 if (scall == MAT_INITIAL_MATRIX) { 5628 start = A->cmap->rstart; 5629 cmap = a->garray; 5630 nzA = a->A->cmap->n; 5631 nzB = a->B->cmap->n; 5632 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5633 ncols = 0; 5634 for (i=0; i<nzB; i++) { /* row < local row index */ 5635 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5636 else break; 5637 } 5638 imark = i; 5639 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5640 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5641 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5642 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5643 } else { 5644 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5645 isrowb = *rowb; iscolb = *colb; 5646 PetscCall(PetscMalloc1(1,&bseq)); 5647 bseq[0] = *B_seq; 5648 } 5649 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5650 *B_seq = bseq[0]; 5651 PetscCall(PetscFree(bseq)); 5652 if (!rowb) { 5653 PetscCall(ISDestroy(&isrowb)); 5654 } else { 5655 *rowb = isrowb; 5656 } 5657 if (!colb) { 5658 PetscCall(ISDestroy(&iscolb)); 5659 } else { 5660 *colb = iscolb; 5661 } 5662 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5663 PetscFunctionReturn(0); 5664 } 5665 5666 /* 5667 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5668 of the OFF-DIAGONAL portion of local A 5669 5670 Collective on Mat 5671 5672 Input Parameters: 5673 + A,B - the matrices in mpiaij format 5674 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5675 5676 Output Parameter: 5677 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5678 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5679 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5680 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5681 5682 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5683 for this matrix. This is not desirable.. 5684 5685 Level: developer 5686 5687 */ 5688 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5689 { 5690 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5691 Mat_SeqAIJ *b_oth; 5692 VecScatter ctx; 5693 MPI_Comm comm; 5694 const PetscMPIInt *rprocs,*sprocs; 5695 const PetscInt *srow,*rstarts,*sstarts; 5696 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5697 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5698 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5699 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5700 PetscMPIInt size,tag,rank,nreqs; 5701 5702 PetscFunctionBegin; 5703 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5704 PetscCallMPI(MPI_Comm_size(comm,&size)); 5705 5706 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5707 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5708 } 5709 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5710 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5711 5712 if (size == 1) { 5713 startsj_s = NULL; 5714 bufa_ptr = NULL; 5715 *B_oth = NULL; 5716 PetscFunctionReturn(0); 5717 } 5718 5719 ctx = a->Mvctx; 5720 tag = ((PetscObject)ctx)->tag; 5721 5722 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5723 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5724 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5725 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5726 PetscCall(PetscMalloc1(nreqs,&reqs)); 5727 rwaits = reqs; 5728 swaits = reqs + nrecvs; 5729 5730 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5731 if (scall == MAT_INITIAL_MATRIX) { 5732 /* i-array */ 5733 /*---------*/ 5734 /* post receives */ 5735 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5736 for (i=0; i<nrecvs; i++) { 5737 rowlen = rvalues + rstarts[i]*rbs; 5738 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5739 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5740 } 5741 5742 /* pack the outgoing message */ 5743 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5744 5745 sstartsj[0] = 0; 5746 rstartsj[0] = 0; 5747 len = 0; /* total length of j or a array to be sent */ 5748 if (nsends) { 5749 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5750 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5751 } 5752 for (i=0; i<nsends; i++) { 5753 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5754 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5755 for (j=0; j<nrows; j++) { 5756 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5757 for (l=0; l<sbs; l++) { 5758 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5759 5760 rowlen[j*sbs+l] = ncols; 5761 5762 len += ncols; 5763 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5764 } 5765 k++; 5766 } 5767 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5768 5769 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5770 } 5771 /* recvs and sends of i-array are completed */ 5772 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5773 PetscCall(PetscFree(svalues)); 5774 5775 /* allocate buffers for sending j and a arrays */ 5776 PetscCall(PetscMalloc1(len+1,&bufj)); 5777 PetscCall(PetscMalloc1(len+1,&bufa)); 5778 5779 /* create i-array of B_oth */ 5780 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5781 5782 b_othi[0] = 0; 5783 len = 0; /* total length of j or a array to be received */ 5784 k = 0; 5785 for (i=0; i<nrecvs; i++) { 5786 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5787 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5788 for (j=0; j<nrows; j++) { 5789 b_othi[k+1] = b_othi[k] + rowlen[j]; 5790 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5791 k++; 5792 } 5793 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5794 } 5795 PetscCall(PetscFree(rvalues)); 5796 5797 /* allocate space for j and a arrays of B_oth */ 5798 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5799 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5800 5801 /* j-array */ 5802 /*---------*/ 5803 /* post receives of j-array */ 5804 for (i=0; i<nrecvs; i++) { 5805 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5806 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5807 } 5808 5809 /* pack the outgoing message j-array */ 5810 if (nsends) k = sstarts[0]; 5811 for (i=0; i<nsends; i++) { 5812 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5813 bufJ = bufj+sstartsj[i]; 5814 for (j=0; j<nrows; j++) { 5815 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5816 for (ll=0; ll<sbs; ll++) { 5817 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5818 for (l=0; l<ncols; l++) { 5819 *bufJ++ = cols[l]; 5820 } 5821 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5822 } 5823 } 5824 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5825 } 5826 5827 /* recvs and sends of j-array are completed */ 5828 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5829 } else if (scall == MAT_REUSE_MATRIX) { 5830 sstartsj = *startsj_s; 5831 rstartsj = *startsj_r; 5832 bufa = *bufa_ptr; 5833 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5834 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5835 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5836 5837 /* a-array */ 5838 /*---------*/ 5839 /* post receives of a-array */ 5840 for (i=0; i<nrecvs; i++) { 5841 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5842 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5843 } 5844 5845 /* pack the outgoing message a-array */ 5846 if (nsends) k = sstarts[0]; 5847 for (i=0; i<nsends; i++) { 5848 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5849 bufA = bufa+sstartsj[i]; 5850 for (j=0; j<nrows; j++) { 5851 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5852 for (ll=0; ll<sbs; ll++) { 5853 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5854 for (l=0; l<ncols; l++) { 5855 *bufA++ = vals[l]; 5856 } 5857 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5858 } 5859 } 5860 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5861 } 5862 /* recvs and sends of a-array are completed */ 5863 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5864 PetscCall(PetscFree(reqs)); 5865 5866 if (scall == MAT_INITIAL_MATRIX) { 5867 /* put together the new matrix */ 5868 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5869 5870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5872 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5873 b_oth->free_a = PETSC_TRUE; 5874 b_oth->free_ij = PETSC_TRUE; 5875 b_oth->nonew = 0; 5876 5877 PetscCall(PetscFree(bufj)); 5878 if (!startsj_s || !bufa_ptr) { 5879 PetscCall(PetscFree2(sstartsj,rstartsj)); 5880 PetscCall(PetscFree(bufa_ptr)); 5881 } else { 5882 *startsj_s = sstartsj; 5883 *startsj_r = rstartsj; 5884 *bufa_ptr = bufa; 5885 } 5886 } else if (scall == MAT_REUSE_MATRIX) { 5887 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5888 } 5889 5890 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5891 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5892 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5893 PetscFunctionReturn(0); 5894 } 5895 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5898 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5899 #if defined(PETSC_HAVE_MKL_SPARSE) 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5901 #endif 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5904 #if defined(PETSC_HAVE_ELEMENTAL) 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5906 #endif 5907 #if defined(PETSC_HAVE_SCALAPACK) 5908 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5909 #endif 5910 #if defined(PETSC_HAVE_HYPRE) 5911 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5912 #endif 5913 #if defined(PETSC_HAVE_CUDA) 5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5915 #endif 5916 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5918 #endif 5919 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5920 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5921 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5922 5923 /* 5924 Computes (B'*A')' since computing B*A directly is untenable 5925 5926 n p p 5927 [ ] [ ] [ ] 5928 m [ A ] * n [ B ] = m [ C ] 5929 [ ] [ ] [ ] 5930 5931 */ 5932 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5933 { 5934 Mat At,Bt,Ct; 5935 5936 PetscFunctionBegin; 5937 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5938 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5939 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5940 PetscCall(MatDestroy(&At)); 5941 PetscCall(MatDestroy(&Bt)); 5942 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5943 PetscCall(MatDestroy(&Ct)); 5944 PetscFunctionReturn(0); 5945 } 5946 5947 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5948 { 5949 PetscBool cisdense; 5950 5951 PetscFunctionBegin; 5952 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5953 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5954 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5955 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5956 if (!cisdense) { 5957 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5958 } 5959 PetscCall(MatSetUp(C)); 5960 5961 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5962 PetscFunctionReturn(0); 5963 } 5964 5965 /* ----------------------------------------------------------------*/ 5966 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5967 { 5968 Mat_Product *product = C->product; 5969 Mat A = product->A,B=product->B; 5970 5971 PetscFunctionBegin; 5972 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5973 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5974 5975 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5976 C->ops->productsymbolic = MatProductSymbolic_AB; 5977 PetscFunctionReturn(0); 5978 } 5979 5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5981 { 5982 Mat_Product *product = C->product; 5983 5984 PetscFunctionBegin; 5985 if (product->type == MATPRODUCT_AB) { 5986 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5987 } 5988 PetscFunctionReturn(0); 5989 } 5990 5991 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5992 5993 Input Parameters: 5994 5995 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5996 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5997 5998 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5999 6000 For Set1, j1[] contains column indices of the nonzeros. 6001 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6002 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6003 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6004 6005 Similar for Set2. 6006 6007 This routine merges the two sets of nonzeros row by row and removes repeats. 6008 6009 Output Parameters: (memory is allocated by the caller) 6010 6011 i[],j[]: the CSR of the merged matrix, which has m rows. 6012 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6013 imap2[]: similar to imap1[], but for Set2. 6014 Note we order nonzeros row-by-row and from left to right. 6015 */ 6016 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6017 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6018 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6019 { 6020 PetscInt r,m; /* Row index of mat */ 6021 PetscCount t,t1,t2,b1,e1,b2,e2; 6022 6023 PetscFunctionBegin; 6024 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6025 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6026 i[0] = 0; 6027 for (r=0; r<m; r++) { /* Do row by row merging */ 6028 b1 = rowBegin1[r]; 6029 e1 = rowEnd1[r]; 6030 b2 = rowBegin2[r]; 6031 e2 = rowEnd2[r]; 6032 while (b1 < e1 && b2 < e2) { 6033 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6034 j[t] = j1[b1]; 6035 imap1[t1] = t; 6036 imap2[t2] = t; 6037 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6038 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6039 t1++; t2++; t++; 6040 } else if (j1[b1] < j2[b2]) { 6041 j[t] = j1[b1]; 6042 imap1[t1] = t; 6043 b1 += jmap1[t1+1] - jmap1[t1]; 6044 t1++; t++; 6045 } else { 6046 j[t] = j2[b2]; 6047 imap2[t2] = t; 6048 b2 += jmap2[t2+1] - jmap2[t2]; 6049 t2++; t++; 6050 } 6051 } 6052 /* Merge the remaining in either j1[] or j2[] */ 6053 while (b1 < e1) { 6054 j[t] = j1[b1]; 6055 imap1[t1] = t; 6056 b1 += jmap1[t1+1] - jmap1[t1]; 6057 t1++; t++; 6058 } 6059 while (b2 < e2) { 6060 j[t] = j2[b2]; 6061 imap2[t2] = t; 6062 b2 += jmap2[t2+1] - jmap2[t2]; 6063 t2++; t++; 6064 } 6065 i[r+1] = t; 6066 } 6067 PetscFunctionReturn(0); 6068 } 6069 6070 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6071 6072 Input Parameters: 6073 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6074 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6075 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6076 6077 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6078 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6079 6080 Output Parameters: 6081 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6082 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6083 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6084 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6085 6086 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6087 Atot: number of entries belonging to the diagonal block. 6088 Annz: number of unique nonzeros belonging to the diagonal block. 6089 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6090 repeats (i.e., same 'i,j' pair). 6091 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6092 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6093 6094 Atot: number of entries belonging to the diagonal block 6095 Annz: number of unique nonzeros belonging to the diagonal block. 6096 6097 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6098 6099 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6100 */ 6101 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6102 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6103 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6104 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6105 { 6106 PetscInt cstart,cend,rstart,rend,row,col; 6107 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6108 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6109 PetscCount k,m,p,q,r,s,mid; 6110 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6111 6112 PetscFunctionBegin; 6113 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6114 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6115 m = rend - rstart; 6116 6117 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6118 6119 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6120 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6121 */ 6122 while (k<n) { 6123 row = i[k]; 6124 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6125 for (s=k; s<n; s++) if (i[s] != row) break; 6126 for (p=k; p<s; p++) { 6127 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6128 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6129 } 6130 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6131 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6132 rowBegin[row-rstart] = k; 6133 rowMid[row-rstart] = mid; 6134 rowEnd[row-rstart] = s; 6135 6136 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6137 Atot += mid - k; 6138 Btot += s - mid; 6139 6140 /* Count unique nonzeros of this diag/offdiag row */ 6141 for (p=k; p<mid;) { 6142 col = j[p]; 6143 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6144 Annz++; 6145 } 6146 6147 for (p=mid; p<s;) { 6148 col = j[p]; 6149 do {p++;} while (p<s && j[p] == col); 6150 Bnnz++; 6151 } 6152 k = s; 6153 } 6154 6155 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6156 PetscCall(PetscMalloc1(Atot,&Aperm)); 6157 PetscCall(PetscMalloc1(Btot,&Bperm)); 6158 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6159 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6160 6161 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6162 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6163 for (r=0; r<m; r++) { 6164 k = rowBegin[r]; 6165 mid = rowMid[r]; 6166 s = rowEnd[r]; 6167 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6168 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6169 Atot += mid - k; 6170 Btot += s - mid; 6171 6172 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6173 for (p=k; p<mid;) { 6174 col = j[p]; 6175 q = p; 6176 do {p++;} while (p<mid && j[p] == col); 6177 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6178 Annz++; 6179 } 6180 6181 for (p=mid; p<s;) { 6182 col = j[p]; 6183 q = p; 6184 do {p++;} while (p<s && j[p] == col); 6185 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6186 Bnnz++; 6187 } 6188 } 6189 /* Output */ 6190 *Aperm_ = Aperm; 6191 *Annz_ = Annz; 6192 *Atot_ = Atot; 6193 *Ajmap_ = Ajmap; 6194 *Bperm_ = Bperm; 6195 *Bnnz_ = Bnnz; 6196 *Btot_ = Btot; 6197 *Bjmap_ = Bjmap; 6198 PetscFunctionReturn(0); 6199 } 6200 6201 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6202 6203 Input Parameters: 6204 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6205 nnz: number of unique nonzeros in the merged matrix 6206 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6207 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6208 6209 Output Parameter: (memory is allocated by the caller) 6210 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6211 6212 Example: 6213 nnz1 = 4 6214 nnz = 6 6215 imap = [1,3,4,5] 6216 jmap = [0,3,5,6,7] 6217 then, 6218 jmap_new = [0,0,3,3,5,6,7] 6219 */ 6220 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6221 { 6222 PetscCount k,p; 6223 6224 PetscFunctionBegin; 6225 jmap_new[0] = 0; 6226 p = nnz; /* p loops over jmap_new[] backwards */ 6227 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6228 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6229 } 6230 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6231 PetscFunctionReturn(0); 6232 } 6233 6234 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6235 { 6236 MPI_Comm comm; 6237 PetscMPIInt rank,size; 6238 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6239 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6240 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6241 6242 PetscFunctionBegin; 6243 PetscCall(PetscFree(mpiaij->garray)); 6244 PetscCall(VecDestroy(&mpiaij->lvec)); 6245 #if defined(PETSC_USE_CTABLE) 6246 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6247 #else 6248 PetscCall(PetscFree(mpiaij->colmap)); 6249 #endif 6250 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6251 mat->assembled = PETSC_FALSE; 6252 mat->was_assembled = PETSC_FALSE; 6253 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6254 6255 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6256 PetscCallMPI(MPI_Comm_size(comm,&size)); 6257 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6258 PetscCall(PetscLayoutSetUp(mat->rmap)); 6259 PetscCall(PetscLayoutSetUp(mat->cmap)); 6260 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6261 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6262 PetscCall(MatGetLocalSize(mat,&m,&n)); 6263 PetscCall(MatGetSize(mat,&M,&N)); 6264 6265 /* ---------------------------------------------------------------------------*/ 6266 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6267 /* entries come first, then local rows, then remote rows. */ 6268 /* ---------------------------------------------------------------------------*/ 6269 PetscCount n1 = coo_n,*perm1; 6270 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6271 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6272 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6273 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6274 for (k=0; k<n1; k++) perm1[k] = k; 6275 6276 /* Manipulate indices so that entries with negative row or col indices will have smallest 6277 row indices, local entries will have greater but negative row indices, and remote entries 6278 will have positive row indices. 6279 */ 6280 for (k=0; k<n1; k++) { 6281 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6282 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6283 else { 6284 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6285 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6286 } 6287 } 6288 6289 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6290 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6291 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6292 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6293 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6294 6295 /* ---------------------------------------------------------------------------*/ 6296 /* Split local rows into diag/offdiag portions */ 6297 /* ---------------------------------------------------------------------------*/ 6298 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6299 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6300 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6301 6302 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6303 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6304 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6305 6306 /* ---------------------------------------------------------------------------*/ 6307 /* Send remote rows to their owner */ 6308 /* ---------------------------------------------------------------------------*/ 6309 /* Find which rows should be sent to which remote ranks*/ 6310 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6311 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6312 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6313 const PetscInt *ranges; 6314 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6315 6316 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6317 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6318 for (k=rem; k<n1;) { 6319 PetscMPIInt owner; 6320 PetscInt firstRow,lastRow; 6321 6322 /* Locate a row range */ 6323 firstRow = i1[k]; /* first row of this owner */ 6324 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6325 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6326 6327 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6328 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6329 6330 /* All entries in [k,p) belong to this remote owner */ 6331 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6332 PetscMPIInt *sendto2; 6333 PetscInt *nentries2; 6334 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6335 6336 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6337 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6338 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6339 PetscCall(PetscFree2(sendto,nentries2)); 6340 sendto = sendto2; 6341 nentries = nentries2; 6342 maxNsend = maxNsend2; 6343 } 6344 sendto[nsend] = owner; 6345 nentries[nsend] = p - k; 6346 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6347 nsend++; 6348 k = p; 6349 } 6350 6351 /* Build 1st SF to know offsets on remote to send data */ 6352 PetscSF sf1; 6353 PetscInt nroots = 1,nroots2 = 0; 6354 PetscInt nleaves = nsend,nleaves2 = 0; 6355 PetscInt *offsets; 6356 PetscSFNode *iremote; 6357 6358 PetscCall(PetscSFCreate(comm,&sf1)); 6359 PetscCall(PetscMalloc1(nsend,&iremote)); 6360 PetscCall(PetscMalloc1(nsend,&offsets)); 6361 for (k=0; k<nsend; k++) { 6362 iremote[k].rank = sendto[k]; 6363 iremote[k].index = 0; 6364 nleaves2 += nentries[k]; 6365 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6366 } 6367 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6368 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6369 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6370 PetscCall(PetscSFDestroy(&sf1)); 6371 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6372 6373 /* Build 2nd SF to send remote COOs to their owner */ 6374 PetscSF sf2; 6375 nroots = nroots2; 6376 nleaves = nleaves2; 6377 PetscCall(PetscSFCreate(comm,&sf2)); 6378 PetscCall(PetscSFSetFromOptions(sf2)); 6379 PetscCall(PetscMalloc1(nleaves,&iremote)); 6380 p = 0; 6381 for (k=0; k<nsend; k++) { 6382 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6383 for (q=0; q<nentries[k]; q++,p++) { 6384 iremote[p].rank = sendto[k]; 6385 iremote[p].index = offsets[k] + q; 6386 } 6387 } 6388 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6389 6390 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6391 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6392 6393 /* Send the remote COOs to their owner */ 6394 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6395 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6396 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6397 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6398 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6399 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6400 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6401 6402 PetscCall(PetscFree(offsets)); 6403 PetscCall(PetscFree2(sendto,nentries)); 6404 6405 /* ---------------------------------------------------------------*/ 6406 /* Sort received COOs by row along with the permutation array */ 6407 /* ---------------------------------------------------------------*/ 6408 for (k=0; k<n2; k++) perm2[k] = k; 6409 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6410 6411 /* ---------------------------------------------------------------*/ 6412 /* Split received COOs into diag/offdiag portions */ 6413 /* ---------------------------------------------------------------*/ 6414 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6415 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6416 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6417 6418 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6419 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6420 6421 /* --------------------------------------------------------------------------*/ 6422 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6423 /* --------------------------------------------------------------------------*/ 6424 PetscInt *Ai,*Bi; 6425 PetscInt *Aj,*Bj; 6426 6427 PetscCall(PetscMalloc1(m+1,&Ai)); 6428 PetscCall(PetscMalloc1(m+1,&Bi)); 6429 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6430 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6431 6432 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6433 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6434 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6435 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6436 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6437 6438 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6439 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6440 6441 /* --------------------------------------------------------------------------*/ 6442 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6443 /* expect nonzeros in A/B most likely have local contributing entries */ 6444 /* --------------------------------------------------------------------------*/ 6445 PetscInt Annz = Ai[m]; 6446 PetscInt Bnnz = Bi[m]; 6447 PetscCount *Ajmap1_new,*Bjmap1_new; 6448 6449 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6450 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6451 6452 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6453 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6454 6455 PetscCall(PetscFree(Aimap1)); 6456 PetscCall(PetscFree(Ajmap1)); 6457 PetscCall(PetscFree(Bimap1)); 6458 PetscCall(PetscFree(Bjmap1)); 6459 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6460 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6461 PetscCall(PetscFree3(i1,j1,perm1)); 6462 PetscCall(PetscFree3(i2,j2,perm2)); 6463 6464 Ajmap1 = Ajmap1_new; 6465 Bjmap1 = Bjmap1_new; 6466 6467 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6468 if (Annz < Annz1 + Annz2) { 6469 PetscInt *Aj_new; 6470 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6471 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6472 PetscCall(PetscFree(Aj)); 6473 Aj = Aj_new; 6474 } 6475 6476 if (Bnnz < Bnnz1 + Bnnz2) { 6477 PetscInt *Bj_new; 6478 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6479 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6480 PetscCall(PetscFree(Bj)); 6481 Bj = Bj_new; 6482 } 6483 6484 /* --------------------------------------------------------------------------------*/ 6485 /* Create new submatrices for on-process and off-process coupling */ 6486 /* --------------------------------------------------------------------------------*/ 6487 PetscScalar *Aa,*Ba; 6488 MatType rtype; 6489 Mat_SeqAIJ *a,*b; 6490 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6491 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6492 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6493 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6494 PetscCall(MatDestroy(&mpiaij->A)); 6495 PetscCall(MatDestroy(&mpiaij->B)); 6496 PetscCall(MatGetRootType_Private(mat,&rtype)); 6497 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6498 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6499 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6500 6501 a = (Mat_SeqAIJ*)mpiaij->A->data; 6502 b = (Mat_SeqAIJ*)mpiaij->B->data; 6503 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6504 a->free_a = b->free_a = PETSC_TRUE; 6505 a->free_ij = b->free_ij = PETSC_TRUE; 6506 6507 /* conversion must happen AFTER multiply setup */ 6508 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6509 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6510 PetscCall(VecDestroy(&mpiaij->lvec)); 6511 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6512 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6513 6514 mpiaij->coo_n = coo_n; 6515 mpiaij->coo_sf = sf2; 6516 mpiaij->sendlen = nleaves; 6517 mpiaij->recvlen = nroots; 6518 6519 mpiaij->Annz = Annz; 6520 mpiaij->Bnnz = Bnnz; 6521 6522 mpiaij->Annz2 = Annz2; 6523 mpiaij->Bnnz2 = Bnnz2; 6524 6525 mpiaij->Atot1 = Atot1; 6526 mpiaij->Atot2 = Atot2; 6527 mpiaij->Btot1 = Btot1; 6528 mpiaij->Btot2 = Btot2; 6529 6530 mpiaij->Ajmap1 = Ajmap1; 6531 mpiaij->Aperm1 = Aperm1; 6532 6533 mpiaij->Bjmap1 = Bjmap1; 6534 mpiaij->Bperm1 = Bperm1; 6535 6536 mpiaij->Aimap2 = Aimap2; 6537 mpiaij->Ajmap2 = Ajmap2; 6538 mpiaij->Aperm2 = Aperm2; 6539 6540 mpiaij->Bimap2 = Bimap2; 6541 mpiaij->Bjmap2 = Bjmap2; 6542 mpiaij->Bperm2 = Bperm2; 6543 6544 mpiaij->Cperm1 = Cperm1; 6545 6546 /* Allocate in preallocation. If not used, it has zero cost on host */ 6547 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6548 PetscFunctionReturn(0); 6549 } 6550 6551 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6552 { 6553 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6554 Mat A = mpiaij->A,B = mpiaij->B; 6555 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6556 PetscScalar *Aa,*Ba; 6557 PetscScalar *sendbuf = mpiaij->sendbuf; 6558 PetscScalar *recvbuf = mpiaij->recvbuf; 6559 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6560 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6561 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6562 const PetscCount *Cperm1 = mpiaij->Cperm1; 6563 6564 PetscFunctionBegin; 6565 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6566 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6567 6568 /* Pack entries to be sent to remote */ 6569 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6570 6571 /* Send remote entries to their owner and overlap the communication with local computation */ 6572 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6573 /* Add local entries to A and B */ 6574 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6575 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6576 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6577 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6578 } 6579 for (PetscCount i=0; i<Bnnz; i++) { 6580 PetscScalar sum = 0.0; 6581 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6582 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6583 } 6584 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6585 6586 /* Add received remote entries to A and B */ 6587 for (PetscCount i=0; i<Annz2; i++) { 6588 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6589 } 6590 for (PetscCount i=0; i<Bnnz2; i++) { 6591 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6592 } 6593 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6594 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6595 PetscFunctionReturn(0); 6596 } 6597 6598 /* ----------------------------------------------------------------*/ 6599 6600 /*MC 6601 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6602 6603 Options Database Keys: 6604 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6605 6606 Level: beginner 6607 6608 Notes: 6609 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6610 in this case the values associated with the rows and columns one passes in are set to zero 6611 in the matrix 6612 6613 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6614 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6615 6616 .seealso: `MatCreateAIJ()` 6617 M*/ 6618 6619 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6620 { 6621 Mat_MPIAIJ *b; 6622 PetscMPIInt size; 6623 6624 PetscFunctionBegin; 6625 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6626 6627 PetscCall(PetscNewLog(B,&b)); 6628 B->data = (void*)b; 6629 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6630 B->assembled = PETSC_FALSE; 6631 B->insertmode = NOT_SET_VALUES; 6632 b->size = size; 6633 6634 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6635 6636 /* build cache for off array entries formed */ 6637 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6638 6639 b->donotstash = PETSC_FALSE; 6640 b->colmap = NULL; 6641 b->garray = NULL; 6642 b->roworiented = PETSC_TRUE; 6643 6644 /* stuff used for matrix vector multiply */ 6645 b->lvec = NULL; 6646 b->Mvctx = NULL; 6647 6648 /* stuff for MatGetRow() */ 6649 b->rowindices = NULL; 6650 b->rowvalues = NULL; 6651 b->getrowactive = PETSC_FALSE; 6652 6653 /* flexible pointer used in CUSPARSE classes */ 6654 b->spptr = NULL; 6655 6656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6666 #if defined(PETSC_HAVE_CUDA) 6667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6668 #endif 6669 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6671 #endif 6672 #if defined(PETSC_HAVE_MKL_SPARSE) 6673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6674 #endif 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6676 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6679 #if defined(PETSC_HAVE_ELEMENTAL) 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6681 #endif 6682 #if defined(PETSC_HAVE_SCALAPACK) 6683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6684 #endif 6685 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6687 #if defined(PETSC_HAVE_HYPRE) 6688 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6690 #endif 6691 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6692 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6693 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6694 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6695 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6696 PetscFunctionReturn(0); 6697 } 6698 6699 /*@C 6700 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6701 and "off-diagonal" part of the matrix in CSR format. 6702 6703 Collective 6704 6705 Input Parameters: 6706 + comm - MPI communicator 6707 . m - number of local rows (Cannot be PETSC_DECIDE) 6708 . n - This value should be the same as the local size used in creating the 6709 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6710 calculated if N is given) For square matrices n is almost always m. 6711 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6712 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6713 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6714 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6715 . a - matrix values 6716 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6717 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6718 - oa - matrix values 6719 6720 Output Parameter: 6721 . mat - the matrix 6722 6723 Level: advanced 6724 6725 Notes: 6726 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6727 must free the arrays once the matrix has been destroyed and not before. 6728 6729 The i and j indices are 0 based 6730 6731 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6732 6733 This sets local rows and cannot be used to set off-processor values. 6734 6735 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6736 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6737 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6738 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6739 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6740 communication if it is known that only local entries will be set. 6741 6742 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6743 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6744 @*/ 6745 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6746 { 6747 Mat_MPIAIJ *maij; 6748 6749 PetscFunctionBegin; 6750 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6751 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6752 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6753 PetscCall(MatCreate(comm,mat)); 6754 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6755 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6756 maij = (Mat_MPIAIJ*) (*mat)->data; 6757 6758 (*mat)->preallocated = PETSC_TRUE; 6759 6760 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6761 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6762 6763 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6764 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6765 6766 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6767 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6768 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6769 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6770 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6771 PetscFunctionReturn(0); 6772 } 6773 6774 typedef struct { 6775 Mat *mp; /* intermediate products */ 6776 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6777 PetscInt cp; /* number of intermediate products */ 6778 6779 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6780 PetscInt *startsj_s,*startsj_r; 6781 PetscScalar *bufa; 6782 Mat P_oth; 6783 6784 /* may take advantage of merging product->B */ 6785 Mat Bloc; /* B-local by merging diag and off-diag */ 6786 6787 /* cusparse does not have support to split between symbolic and numeric phases. 6788 When api_user is true, we don't need to update the numerical values 6789 of the temporary storage */ 6790 PetscBool reusesym; 6791 6792 /* support for COO values insertion */ 6793 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6794 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6795 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6796 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6797 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6798 PetscMemType mtype; 6799 6800 /* customization */ 6801 PetscBool abmerge; 6802 PetscBool P_oth_bind; 6803 } MatMatMPIAIJBACKEND; 6804 6805 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6806 { 6807 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6808 PetscInt i; 6809 6810 PetscFunctionBegin; 6811 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6812 PetscCall(PetscFree(mmdata->bufa)); 6813 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6814 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6815 PetscCall(MatDestroy(&mmdata->P_oth)); 6816 PetscCall(MatDestroy(&mmdata->Bloc)); 6817 PetscCall(PetscSFDestroy(&mmdata->sf)); 6818 for (i = 0; i < mmdata->cp; i++) { 6819 PetscCall(MatDestroy(&mmdata->mp[i])); 6820 } 6821 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6822 PetscCall(PetscFree(mmdata->own[0])); 6823 PetscCall(PetscFree(mmdata->own)); 6824 PetscCall(PetscFree(mmdata->off[0])); 6825 PetscCall(PetscFree(mmdata->off)); 6826 PetscCall(PetscFree(mmdata)); 6827 PetscFunctionReturn(0); 6828 } 6829 6830 /* Copy selected n entries with indices in idx[] of A to v[]. 6831 If idx is NULL, copy the whole data array of A to v[] 6832 */ 6833 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6834 { 6835 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6836 6837 PetscFunctionBegin; 6838 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6839 if (f) { 6840 PetscCall((*f)(A,n,idx,v)); 6841 } else { 6842 const PetscScalar *vv; 6843 6844 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6845 if (n && idx) { 6846 PetscScalar *w = v; 6847 const PetscInt *oi = idx; 6848 PetscInt j; 6849 6850 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6851 } else { 6852 PetscCall(PetscArraycpy(v,vv,n)); 6853 } 6854 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6855 } 6856 PetscFunctionReturn(0); 6857 } 6858 6859 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6860 { 6861 MatMatMPIAIJBACKEND *mmdata; 6862 PetscInt i,n_d,n_o; 6863 6864 PetscFunctionBegin; 6865 MatCheckProduct(C,1); 6866 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6867 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6868 if (!mmdata->reusesym) { /* update temporary matrices */ 6869 if (mmdata->P_oth) { 6870 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6871 } 6872 if (mmdata->Bloc) { 6873 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6874 } 6875 } 6876 mmdata->reusesym = PETSC_FALSE; 6877 6878 for (i = 0; i < mmdata->cp; i++) { 6879 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6880 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6881 } 6882 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6883 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6884 6885 if (mmdata->mptmp[i]) continue; 6886 if (noff) { 6887 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6888 6889 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6890 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6891 n_o += noff; 6892 n_d += nown; 6893 } else { 6894 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6895 6896 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6897 n_d += mm->nz; 6898 } 6899 } 6900 if (mmdata->hasoffproc) { /* offprocess insertion */ 6901 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6902 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6903 } 6904 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6905 PetscFunctionReturn(0); 6906 } 6907 6908 /* Support for Pt * A, A * P, or Pt * A * P */ 6909 #define MAX_NUMBER_INTERMEDIATE 4 6910 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6911 { 6912 Mat_Product *product = C->product; 6913 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6914 Mat_MPIAIJ *a,*p; 6915 MatMatMPIAIJBACKEND *mmdata; 6916 ISLocalToGlobalMapping P_oth_l2g = NULL; 6917 IS glob = NULL; 6918 const char *prefix; 6919 char pprefix[256]; 6920 const PetscInt *globidx,*P_oth_idx; 6921 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6922 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6923 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6924 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6925 /* a base offset; type-2: sparse with a local to global map table */ 6926 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6927 6928 MatProductType ptype; 6929 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6930 PetscMPIInt size; 6931 6932 PetscFunctionBegin; 6933 MatCheckProduct(C,1); 6934 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6935 ptype = product->type; 6936 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6937 ptype = MATPRODUCT_AB; 6938 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6939 } 6940 switch (ptype) { 6941 case MATPRODUCT_AB: 6942 A = product->A; 6943 P = product->B; 6944 m = A->rmap->n; 6945 n = P->cmap->n; 6946 M = A->rmap->N; 6947 N = P->cmap->N; 6948 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6949 break; 6950 case MATPRODUCT_AtB: 6951 P = product->A; 6952 A = product->B; 6953 m = P->cmap->n; 6954 n = A->cmap->n; 6955 M = P->cmap->N; 6956 N = A->cmap->N; 6957 hasoffproc = PETSC_TRUE; 6958 break; 6959 case MATPRODUCT_PtAP: 6960 A = product->A; 6961 P = product->B; 6962 m = P->cmap->n; 6963 n = P->cmap->n; 6964 M = P->cmap->N; 6965 N = P->cmap->N; 6966 hasoffproc = PETSC_TRUE; 6967 break; 6968 default: 6969 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6970 } 6971 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6972 if (size == 1) hasoffproc = PETSC_FALSE; 6973 6974 /* defaults */ 6975 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6976 mp[i] = NULL; 6977 mptmp[i] = PETSC_FALSE; 6978 rmapt[i] = -1; 6979 cmapt[i] = -1; 6980 rmapa[i] = NULL; 6981 cmapa[i] = NULL; 6982 } 6983 6984 /* customization */ 6985 PetscCall(PetscNew(&mmdata)); 6986 mmdata->reusesym = product->api_user; 6987 if (ptype == MATPRODUCT_AB) { 6988 if (product->api_user) { 6989 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6990 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6991 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6992 PetscOptionsEnd(); 6993 } else { 6994 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6995 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6996 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6997 PetscOptionsEnd(); 6998 } 6999 } else if (ptype == MATPRODUCT_PtAP) { 7000 if (product->api_user) { 7001 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7002 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7003 PetscOptionsEnd(); 7004 } else { 7005 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7006 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7007 PetscOptionsEnd(); 7008 } 7009 } 7010 a = (Mat_MPIAIJ*)A->data; 7011 p = (Mat_MPIAIJ*)P->data; 7012 PetscCall(MatSetSizes(C,m,n,M,N)); 7013 PetscCall(PetscLayoutSetUp(C->rmap)); 7014 PetscCall(PetscLayoutSetUp(C->cmap)); 7015 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7016 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7017 7018 cp = 0; 7019 switch (ptype) { 7020 case MATPRODUCT_AB: /* A * P */ 7021 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7022 7023 /* A_diag * P_local (merged or not) */ 7024 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7025 /* P is product->B */ 7026 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7027 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7028 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7029 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7030 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7031 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7032 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7033 mp[cp]->product->api_user = product->api_user; 7034 PetscCall(MatProductSetFromOptions(mp[cp])); 7035 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7036 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7037 PetscCall(ISGetIndices(glob,&globidx)); 7038 rmapt[cp] = 1; 7039 cmapt[cp] = 2; 7040 cmapa[cp] = globidx; 7041 mptmp[cp] = PETSC_FALSE; 7042 cp++; 7043 } else { /* A_diag * P_diag and A_diag * P_off */ 7044 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7045 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7046 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7047 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7048 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7049 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7050 mp[cp]->product->api_user = product->api_user; 7051 PetscCall(MatProductSetFromOptions(mp[cp])); 7052 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7053 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7054 rmapt[cp] = 1; 7055 cmapt[cp] = 1; 7056 mptmp[cp] = PETSC_FALSE; 7057 cp++; 7058 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7059 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7060 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7061 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7062 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7063 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7064 mp[cp]->product->api_user = product->api_user; 7065 PetscCall(MatProductSetFromOptions(mp[cp])); 7066 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7067 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7068 rmapt[cp] = 1; 7069 cmapt[cp] = 2; 7070 cmapa[cp] = p->garray; 7071 mptmp[cp] = PETSC_FALSE; 7072 cp++; 7073 } 7074 7075 /* A_off * P_other */ 7076 if (mmdata->P_oth) { 7077 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7078 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7079 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7080 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7081 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7082 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7083 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7084 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7085 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7086 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7087 mp[cp]->product->api_user = product->api_user; 7088 PetscCall(MatProductSetFromOptions(mp[cp])); 7089 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7090 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7091 rmapt[cp] = 1; 7092 cmapt[cp] = 2; 7093 cmapa[cp] = P_oth_idx; 7094 mptmp[cp] = PETSC_FALSE; 7095 cp++; 7096 } 7097 break; 7098 7099 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7100 /* A is product->B */ 7101 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7102 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7103 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7104 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7105 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7106 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7107 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7108 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7109 mp[cp]->product->api_user = product->api_user; 7110 PetscCall(MatProductSetFromOptions(mp[cp])); 7111 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7112 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7113 PetscCall(ISGetIndices(glob,&globidx)); 7114 rmapt[cp] = 2; 7115 rmapa[cp] = globidx; 7116 cmapt[cp] = 2; 7117 cmapa[cp] = globidx; 7118 mptmp[cp] = PETSC_FALSE; 7119 cp++; 7120 } else { 7121 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7122 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7123 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7124 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7125 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7126 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7127 mp[cp]->product->api_user = product->api_user; 7128 PetscCall(MatProductSetFromOptions(mp[cp])); 7129 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7130 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7131 PetscCall(ISGetIndices(glob,&globidx)); 7132 rmapt[cp] = 1; 7133 cmapt[cp] = 2; 7134 cmapa[cp] = globidx; 7135 mptmp[cp] = PETSC_FALSE; 7136 cp++; 7137 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7138 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7139 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7140 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7141 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7142 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7143 mp[cp]->product->api_user = product->api_user; 7144 PetscCall(MatProductSetFromOptions(mp[cp])); 7145 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7146 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7147 rmapt[cp] = 2; 7148 rmapa[cp] = p->garray; 7149 cmapt[cp] = 2; 7150 cmapa[cp] = globidx; 7151 mptmp[cp] = PETSC_FALSE; 7152 cp++; 7153 } 7154 break; 7155 case MATPRODUCT_PtAP: 7156 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7157 /* P is product->B */ 7158 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7159 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7160 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7161 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7162 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7163 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7164 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7165 mp[cp]->product->api_user = product->api_user; 7166 PetscCall(MatProductSetFromOptions(mp[cp])); 7167 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7168 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7169 PetscCall(ISGetIndices(glob,&globidx)); 7170 rmapt[cp] = 2; 7171 rmapa[cp] = globidx; 7172 cmapt[cp] = 2; 7173 cmapa[cp] = globidx; 7174 mptmp[cp] = PETSC_FALSE; 7175 cp++; 7176 if (mmdata->P_oth) { 7177 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7178 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7179 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7180 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7181 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7182 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7183 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7184 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7185 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7186 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7187 mp[cp]->product->api_user = product->api_user; 7188 PetscCall(MatProductSetFromOptions(mp[cp])); 7189 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7190 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7191 mptmp[cp] = PETSC_TRUE; 7192 cp++; 7193 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7194 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7195 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7196 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7197 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7198 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7199 mp[cp]->product->api_user = product->api_user; 7200 PetscCall(MatProductSetFromOptions(mp[cp])); 7201 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7202 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7203 rmapt[cp] = 2; 7204 rmapa[cp] = globidx; 7205 cmapt[cp] = 2; 7206 cmapa[cp] = P_oth_idx; 7207 mptmp[cp] = PETSC_FALSE; 7208 cp++; 7209 } 7210 break; 7211 default: 7212 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7213 } 7214 /* sanity check */ 7215 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7216 7217 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7218 for (i = 0; i < cp; i++) { 7219 mmdata->mp[i] = mp[i]; 7220 mmdata->mptmp[i] = mptmp[i]; 7221 } 7222 mmdata->cp = cp; 7223 C->product->data = mmdata; 7224 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7225 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7226 7227 /* memory type */ 7228 mmdata->mtype = PETSC_MEMTYPE_HOST; 7229 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7230 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7231 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7232 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7233 7234 /* prepare coo coordinates for values insertion */ 7235 7236 /* count total nonzeros of those intermediate seqaij Mats 7237 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7238 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7239 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7240 */ 7241 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7242 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7243 if (mptmp[cp]) continue; 7244 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7245 const PetscInt *rmap = rmapa[cp]; 7246 const PetscInt mr = mp[cp]->rmap->n; 7247 const PetscInt rs = C->rmap->rstart; 7248 const PetscInt re = C->rmap->rend; 7249 const PetscInt *ii = mm->i; 7250 for (i = 0; i < mr; i++) { 7251 const PetscInt gr = rmap[i]; 7252 const PetscInt nz = ii[i+1] - ii[i]; 7253 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7254 else ncoo_oown += nz; /* this row is local */ 7255 } 7256 } else ncoo_d += mm->nz; 7257 } 7258 7259 /* 7260 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7261 7262 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7263 7264 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7265 7266 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7267 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7268 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7269 7270 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7271 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7272 */ 7273 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7274 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7275 7276 /* gather (i,j) of nonzeros inserted by remote procs */ 7277 if (hasoffproc) { 7278 PetscSF msf; 7279 PetscInt ncoo2,*coo_i2,*coo_j2; 7280 7281 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7282 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7283 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7284 7285 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7286 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7287 PetscInt *idxoff = mmdata->off[cp]; 7288 PetscInt *idxown = mmdata->own[cp]; 7289 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7290 const PetscInt *rmap = rmapa[cp]; 7291 const PetscInt *cmap = cmapa[cp]; 7292 const PetscInt *ii = mm->i; 7293 PetscInt *coi = coo_i + ncoo_o; 7294 PetscInt *coj = coo_j + ncoo_o; 7295 const PetscInt mr = mp[cp]->rmap->n; 7296 const PetscInt rs = C->rmap->rstart; 7297 const PetscInt re = C->rmap->rend; 7298 const PetscInt cs = C->cmap->rstart; 7299 for (i = 0; i < mr; i++) { 7300 const PetscInt *jj = mm->j + ii[i]; 7301 const PetscInt gr = rmap[i]; 7302 const PetscInt nz = ii[i+1] - ii[i]; 7303 if (gr < rs || gr >= re) { /* this is an offproc row */ 7304 for (j = ii[i]; j < ii[i+1]; j++) { 7305 *coi++ = gr; 7306 *idxoff++ = j; 7307 } 7308 if (!cmapt[cp]) { /* already global */ 7309 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7310 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7311 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7312 } else { /* offdiag */ 7313 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7314 } 7315 ncoo_o += nz; 7316 } else { /* this is a local row */ 7317 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7318 } 7319 } 7320 } 7321 mmdata->off[cp + 1] = idxoff; 7322 mmdata->own[cp + 1] = idxown; 7323 } 7324 7325 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7326 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7327 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7328 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7329 ncoo = ncoo_d + ncoo_oown + ncoo2; 7330 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7331 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7332 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7333 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7334 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7335 PetscCall(PetscFree2(coo_i,coo_j)); 7336 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7337 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7338 coo_i = coo_i2; 7339 coo_j = coo_j2; 7340 } else { /* no offproc values insertion */ 7341 ncoo = ncoo_d; 7342 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7343 7344 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7345 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7346 PetscCall(PetscSFSetUp(mmdata->sf)); 7347 } 7348 mmdata->hasoffproc = hasoffproc; 7349 7350 /* gather (i,j) of nonzeros inserted locally */ 7351 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7352 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7353 PetscInt *coi = coo_i + ncoo_d; 7354 PetscInt *coj = coo_j + ncoo_d; 7355 const PetscInt *jj = mm->j; 7356 const PetscInt *ii = mm->i; 7357 const PetscInt *cmap = cmapa[cp]; 7358 const PetscInt *rmap = rmapa[cp]; 7359 const PetscInt mr = mp[cp]->rmap->n; 7360 const PetscInt rs = C->rmap->rstart; 7361 const PetscInt re = C->rmap->rend; 7362 const PetscInt cs = C->cmap->rstart; 7363 7364 if (mptmp[cp]) continue; 7365 if (rmapt[cp] == 1) { /* consecutive rows */ 7366 /* fill coo_i */ 7367 for (i = 0; i < mr; i++) { 7368 const PetscInt gr = i + rs; 7369 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7370 } 7371 /* fill coo_j */ 7372 if (!cmapt[cp]) { /* type-0, already global */ 7373 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7374 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7375 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7376 } else { /* type-2, local to global for sparse columns */ 7377 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7378 } 7379 ncoo_d += mm->nz; 7380 } else if (rmapt[cp] == 2) { /* sparse rows */ 7381 for (i = 0; i < mr; i++) { 7382 const PetscInt *jj = mm->j + ii[i]; 7383 const PetscInt gr = rmap[i]; 7384 const PetscInt nz = ii[i+1] - ii[i]; 7385 if (gr >= rs && gr < re) { /* local rows */ 7386 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7387 if (!cmapt[cp]) { /* type-0, already global */ 7388 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7389 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7390 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7391 } else { /* type-2, local to global for sparse columns */ 7392 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7393 } 7394 ncoo_d += nz; 7395 } 7396 } 7397 } 7398 } 7399 if (glob) { 7400 PetscCall(ISRestoreIndices(glob,&globidx)); 7401 } 7402 PetscCall(ISDestroy(&glob)); 7403 if (P_oth_l2g) { 7404 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7405 } 7406 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7407 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7408 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7409 7410 /* preallocate with COO data */ 7411 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7412 PetscCall(PetscFree2(coo_i,coo_j)); 7413 PetscFunctionReturn(0); 7414 } 7415 7416 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7417 { 7418 Mat_Product *product = mat->product; 7419 #if defined(PETSC_HAVE_DEVICE) 7420 PetscBool match = PETSC_FALSE; 7421 PetscBool usecpu = PETSC_FALSE; 7422 #else 7423 PetscBool match = PETSC_TRUE; 7424 #endif 7425 7426 PetscFunctionBegin; 7427 MatCheckProduct(mat,1); 7428 #if defined(PETSC_HAVE_DEVICE) 7429 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7430 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7431 } 7432 if (match) { /* we can always fallback to the CPU if requested */ 7433 switch (product->type) { 7434 case MATPRODUCT_AB: 7435 if (product->api_user) { 7436 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7437 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7438 PetscOptionsEnd(); 7439 } else { 7440 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7441 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7442 PetscOptionsEnd(); 7443 } 7444 break; 7445 case MATPRODUCT_AtB: 7446 if (product->api_user) { 7447 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7448 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7449 PetscOptionsEnd(); 7450 } else { 7451 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7452 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7453 PetscOptionsEnd(); 7454 } 7455 break; 7456 case MATPRODUCT_PtAP: 7457 if (product->api_user) { 7458 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7459 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7460 PetscOptionsEnd(); 7461 } else { 7462 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7463 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7464 PetscOptionsEnd(); 7465 } 7466 break; 7467 default: 7468 break; 7469 } 7470 match = (PetscBool)!usecpu; 7471 } 7472 #endif 7473 if (match) { 7474 switch (product->type) { 7475 case MATPRODUCT_AB: 7476 case MATPRODUCT_AtB: 7477 case MATPRODUCT_PtAP: 7478 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7479 break; 7480 default: 7481 break; 7482 } 7483 } 7484 /* fallback to MPIAIJ ops */ 7485 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7486 PetscFunctionReturn(0); 7487 } 7488 7489 /* 7490 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7491 7492 n - the number of block indices in cc[] 7493 cc - the block indices (must be large enough to contain the indices) 7494 */ 7495 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7496 { 7497 PetscInt cnt = -1,nidx,j; 7498 const PetscInt *idx; 7499 7500 PetscFunctionBegin; 7501 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7502 if (nidx) { 7503 cnt = 0; 7504 cc[cnt] = idx[0]/bs; 7505 for (j=1; j<nidx; j++) { 7506 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7507 } 7508 } 7509 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7510 *n = cnt+1; 7511 PetscFunctionReturn(0); 7512 } 7513 7514 /* 7515 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7516 7517 ncollapsed - the number of block indices 7518 collapsed - the block indices (must be large enough to contain the indices) 7519 */ 7520 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7521 { 7522 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7523 7524 PetscFunctionBegin; 7525 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7526 for (i=start+1; i<start+bs; i++) { 7527 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7528 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7529 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7530 } 7531 *ncollapsed = nprev; 7532 if (collapsed) *collapsed = cprev; 7533 PetscFunctionReturn(0); 7534 } 7535 7536 /* -------------------------------------------------------------------------- */ 7537 /* 7538 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7539 7540 Input Parameter: 7541 . Amat - matrix 7542 - symmetrize - make the result symmetric 7543 + scale - scale with diagonal 7544 7545 Output Parameter: 7546 . a_Gmat - output scalar graph >= 0 7547 7548 */ 7549 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7550 { 7551 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7552 MPI_Comm comm; 7553 Mat Gmat; 7554 PetscBool ismpiaij,isseqaij; 7555 Mat a, b, c; 7556 MatType jtype; 7557 7558 PetscFunctionBegin; 7559 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7560 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7561 PetscCall(MatGetSize(Amat, &MM, &NN)); 7562 PetscCall(MatGetBlockSize(Amat, &bs)); 7563 nloc = (Iend-Istart)/bs; 7564 7565 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7566 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7567 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7568 7569 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7570 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7571 implementation */ 7572 if (bs > 1) { 7573 PetscCall(MatGetType(Amat,&jtype)); 7574 PetscCall(MatCreate(comm, &Gmat)); 7575 PetscCall(MatSetType(Gmat, jtype)); 7576 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7577 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7578 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7579 PetscInt *d_nnz, *o_nnz; 7580 MatScalar *aa,val,AA[4096]; 7581 PetscInt *aj,*ai,AJ[4096],nc; 7582 if (isseqaij) { a = Amat; b = NULL; } 7583 else { 7584 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7585 a = d->A; b = d->B; 7586 } 7587 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7588 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7589 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7590 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7591 const PetscInt *cols; 7592 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7593 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7594 nnz[brow/bs] = jj/bs; 7595 if (jj%bs) ok = 0; 7596 if (cols) j0 = cols[0]; 7597 else j0 = -1; 7598 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7599 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7600 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7601 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7602 if (jj%bs) ok = 0; 7603 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7604 if (nnz[brow/bs] != jj/bs) ok = 0; 7605 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7606 } 7607 if (!ok) { 7608 PetscCall(PetscFree2(d_nnz,o_nnz)); 7609 goto old_bs; 7610 } 7611 } 7612 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7613 } 7614 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7615 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7616 PetscCall(PetscFree2(d_nnz,o_nnz)); 7617 // diag 7618 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7619 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7620 ai = aseq->i; 7621 n = ai[brow+1] - ai[brow]; 7622 aj = aseq->j + ai[brow]; 7623 for (int k=0; k<n; k += bs) { // block columns 7624 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7625 val = 0; 7626 for (int ii=0; ii<bs; ii++) { // rows in block 7627 aa = aseq->a + ai[brow+ii] + k; 7628 for (int jj=0; jj<bs; jj++) { // columns in block 7629 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7630 } 7631 } 7632 AA[k/bs] = val; 7633 } 7634 grow = Istart/bs + brow/bs; 7635 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7636 } 7637 // off-diag 7638 if (ismpiaij) { 7639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7640 const PetscScalar *vals; 7641 const PetscInt *cols, *garray = aij->garray; 7642 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7643 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7644 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7645 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7646 AA[k/bs] = 0; 7647 AJ[cidx] = garray[cols[k]]/bs; 7648 } 7649 nc = ncols/bs; 7650 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7651 for (int ii=0; ii<bs; ii++) { // rows in block 7652 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7653 for (int k=0; k<ncols; k += bs) { 7654 for (int jj=0; jj<bs; jj++) { // cols in block 7655 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7656 } 7657 } 7658 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7659 } 7660 grow = Istart/bs + brow/bs; 7661 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7662 } 7663 } 7664 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7665 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7666 } else { 7667 const PetscScalar *vals; 7668 const PetscInt *idx; 7669 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7670 old_bs: 7671 /* 7672 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7673 */ 7674 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7675 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7676 if (isseqaij) { 7677 PetscInt max_d_nnz; 7678 /* 7679 Determine exact preallocation count for (sequential) scalar matrix 7680 */ 7681 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7682 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7683 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7684 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7685 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7686 } 7687 PetscCall(PetscFree3(w0,w1,w2)); 7688 } else if (ismpiaij) { 7689 Mat Daij,Oaij; 7690 const PetscInt *garray; 7691 PetscInt max_d_nnz; 7692 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7693 /* 7694 Determine exact preallocation count for diagonal block portion of scalar matrix 7695 */ 7696 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7697 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7698 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7699 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7700 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7701 } 7702 PetscCall(PetscFree3(w0,w1,w2)); 7703 /* 7704 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7705 */ 7706 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7707 o_nnz[jj] = 0; 7708 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7709 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7710 o_nnz[jj] += ncols; 7711 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7712 } 7713 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7714 } 7715 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7716 /* get scalar copy (norms) of matrix */ 7717 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7718 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7719 PetscCall(PetscFree2(d_nnz,o_nnz)); 7720 for (Ii = Istart; Ii < Iend; Ii++) { 7721 PetscInt dest_row = Ii/bs; 7722 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7723 for (jj=0; jj<ncols; jj++) { 7724 PetscInt dest_col = idx[jj]/bs; 7725 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7726 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7727 } 7728 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7729 } 7730 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7731 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7732 } 7733 } else { 7734 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7735 procedure via MatAbs API */ 7736 /* just copy scalar matrix & abs() */ 7737 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7738 if (isseqaij) { a = Gmat; b = NULL; } 7739 else { 7740 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7741 a = d->A; b = d->B; 7742 } 7743 /* abs */ 7744 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7745 MatInfo info; 7746 PetscScalar *avals; 7747 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7748 PetscCall(MatSeqAIJGetArray(c,&avals)); 7749 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7750 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7751 } 7752 } 7753 if (symmetrize) { 7754 PetscBool issym; 7755 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7756 if (!issym) { 7757 Mat matTrans; 7758 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7759 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7760 PetscCall(MatDestroy(&matTrans)); 7761 } 7762 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7763 } else { 7764 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7765 } 7766 if (scale) { 7767 /* scale c for all diagonal values = 1 or -1 */ 7768 Vec diag; 7769 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7770 PetscCall(MatGetDiagonal(Gmat, diag)); 7771 PetscCall(VecReciprocal(diag)); 7772 PetscCall(VecSqrtAbs(diag)); 7773 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7774 PetscCall(VecDestroy(&diag)); 7775 } 7776 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7777 *a_Gmat = Gmat; 7778 PetscFunctionReturn(0); 7779 } 7780 7781 /* -------------------------------------------------------------------------- */ 7782 /*@C 7783 MatFilter_AIJ - filter values with small absolute values 7784 With vfilter < 0 does nothing so should not be called. 7785 7786 Collective on Mat 7787 7788 Input Parameters: 7789 + Gmat - the graph 7790 . vfilter - threshold parameter [0,1) 7791 7792 Output Parameter: 7793 . filteredG - output filtered scalar graph 7794 7795 Level: developer 7796 7797 Notes: 7798 This is called before graph coarsers are called. 7799 This could go into Mat, move 'symm' to GAMG 7800 7801 .seealso: `PCGAMGSetThreshold()` 7802 @*/ 7803 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7804 { 7805 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7806 Mat tGmat; 7807 MPI_Comm comm; 7808 const PetscScalar *vals; 7809 const PetscInt *idx; 7810 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7811 MatScalar *AA; // this is checked in graph 7812 PetscBool isseqaij; 7813 Mat a, b, c; 7814 MatType jtype; 7815 7816 PetscFunctionBegin; 7817 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7818 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7819 PetscCall(MatGetType(Gmat,&jtype)); 7820 PetscCall(MatCreate(comm, &tGmat)); 7821 PetscCall(MatSetType(tGmat, jtype)); 7822 7823 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7824 Also, if the matrix is symmetric, can we skip this 7825 operation? It can be very expensive on large matrices. */ 7826 7827 // global sizes 7828 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7829 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7830 nloc = Iend - Istart; 7831 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7832 if (isseqaij) { a = Gmat; b = NULL; } 7833 else { 7834 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7835 a = d->A; b = d->B; 7836 garray = d->garray; 7837 } 7838 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7839 for (PetscInt row=0; row < nloc; row++) { 7840 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7841 d_nnz[row] = ncols; 7842 if (ncols>maxcols) maxcols=ncols; 7843 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7844 } 7845 if (b) { 7846 for (PetscInt row=0; row < nloc; row++) { 7847 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7848 o_nnz[row] = ncols; 7849 if (ncols>maxcols) maxcols=ncols; 7850 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7851 } 7852 } 7853 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7854 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7855 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7856 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7857 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7858 PetscCall(PetscFree2(d_nnz,o_nnz)); 7859 // 7860 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7861 nnz0 = nnz1 = 0; 7862 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7863 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7864 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7865 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7866 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7867 if (PetscRealPart(sv) > vfilter) { 7868 nnz1++; 7869 PetscInt cid = idx[jj] + Istart; //diag 7870 if (c!=a) cid = garray[idx[jj]]; 7871 AA[ncol_row] = vals[jj]; 7872 AJ[ncol_row] = cid; 7873 ncol_row++; 7874 } 7875 } 7876 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7877 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7878 } 7879 } 7880 PetscCall(PetscFree2(AA,AJ)); 7881 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7882 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7883 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7884 7885 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7886 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7887 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7888 7889 *filteredG = tGmat; 7890 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7891 PetscFunctionReturn(0); 7892 } 7893 7894 /* 7895 Special version for direct calls from Fortran 7896 */ 7897 #include <petsc/private/fortranimpl.h> 7898 7899 /* Change these macros so can be used in void function */ 7900 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7901 #undef PetscCall 7902 #define PetscCall(...) do { \ 7903 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7904 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7905 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7906 return; \ 7907 } \ 7908 } while (0) 7909 7910 #undef SETERRQ 7911 #define SETERRQ(comm,ierr,...) do { \ 7912 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7913 return; \ 7914 } while (0) 7915 7916 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7917 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7918 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7919 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7920 #else 7921 #endif 7922 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7923 { 7924 Mat mat = *mmat; 7925 PetscInt m = *mm, n = *mn; 7926 InsertMode addv = *maddv; 7927 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7928 PetscScalar value; 7929 7930 MatCheckPreallocated(mat,1); 7931 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7932 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7933 { 7934 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7935 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7936 PetscBool roworiented = aij->roworiented; 7937 7938 /* Some Variables required in the macro */ 7939 Mat A = aij->A; 7940 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7941 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7942 MatScalar *aa; 7943 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7944 Mat B = aij->B; 7945 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7946 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7947 MatScalar *ba; 7948 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7949 * cannot use "#if defined" inside a macro. */ 7950 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7951 7952 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7953 PetscInt nonew = a->nonew; 7954 MatScalar *ap1,*ap2; 7955 7956 PetscFunctionBegin; 7957 PetscCall(MatSeqAIJGetArray(A,&aa)); 7958 PetscCall(MatSeqAIJGetArray(B,&ba)); 7959 for (i=0; i<m; i++) { 7960 if (im[i] < 0) continue; 7961 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7962 if (im[i] >= rstart && im[i] < rend) { 7963 row = im[i] - rstart; 7964 lastcol1 = -1; 7965 rp1 = aj + ai[row]; 7966 ap1 = aa + ai[row]; 7967 rmax1 = aimax[row]; 7968 nrow1 = ailen[row]; 7969 low1 = 0; 7970 high1 = nrow1; 7971 lastcol2 = -1; 7972 rp2 = bj + bi[row]; 7973 ap2 = ba + bi[row]; 7974 rmax2 = bimax[row]; 7975 nrow2 = bilen[row]; 7976 low2 = 0; 7977 high2 = nrow2; 7978 7979 for (j=0; j<n; j++) { 7980 if (roworiented) value = v[i*n+j]; 7981 else value = v[i+j*m]; 7982 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7983 if (in[j] >= cstart && in[j] < cend) { 7984 col = in[j] - cstart; 7985 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7986 } else if (in[j] < 0) continue; 7987 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7988 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7989 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7990 } else { 7991 if (mat->was_assembled) { 7992 if (!aij->colmap) { 7993 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7994 } 7995 #if defined(PETSC_USE_CTABLE) 7996 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7997 col--; 7998 #else 7999 col = aij->colmap[in[j]] - 1; 8000 #endif 8001 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 8002 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8003 col = in[j]; 8004 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8005 B = aij->B; 8006 b = (Mat_SeqAIJ*)B->data; 8007 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8008 rp2 = bj + bi[row]; 8009 ap2 = ba + bi[row]; 8010 rmax2 = bimax[row]; 8011 nrow2 = bilen[row]; 8012 low2 = 0; 8013 high2 = nrow2; 8014 bm = aij->B->rmap->n; 8015 ba = b->a; 8016 inserted = PETSC_FALSE; 8017 } 8018 } else col = in[j]; 8019 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8020 } 8021 } 8022 } else if (!aij->donotstash) { 8023 if (roworiented) { 8024 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8025 } else { 8026 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8027 } 8028 } 8029 } 8030 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8031 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8032 } 8033 PetscFunctionReturnVoid(); 8034 } 8035 8036 /* Undefining these here since they were redefined from their original definition above! No 8037 * other PETSc functions should be defined past this point, as it is impossible to recover the 8038 * original definitions */ 8039 #undef PetscCall 8040 #undef SETERRQ 8041