1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) PetscCall(MatBindToCPU(a->A,flg)); 78 if (a->B) PetscCall(MatBindToCPU(a->B,flg)); 79 80 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 81 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 82 * to differ from the parent matrix. */ 83 if (a->lvec) PetscCall(VecBindToCPU(a->lvec,flg)); 84 if (a->diag) PetscCall(VecBindToCPU(a->diag,flg)); 85 86 PetscFunctionReturn(0); 87 } 88 89 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 90 { 91 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 92 93 PetscFunctionBegin; 94 if (mat->A) { 95 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 96 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 97 } 98 PetscFunctionReturn(0); 99 } 100 101 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 102 { 103 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 104 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 105 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 106 const PetscInt *ia,*ib; 107 const MatScalar *aa,*bb,*aav,*bav; 108 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 109 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 110 111 PetscFunctionBegin; 112 *keptrows = NULL; 113 114 ia = a->i; 115 ib = b->i; 116 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 117 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 118 for (i=0; i<m; i++) { 119 na = ia[i+1] - ia[i]; 120 nb = ib[i+1] - ib[i]; 121 if (!na && !nb) { 122 cnt++; 123 goto ok1; 124 } 125 aa = aav + ia[i]; 126 for (j=0; j<na; j++) { 127 if (aa[j] != 0.0) goto ok1; 128 } 129 bb = bav + ib[i]; 130 for (j=0; j <nb; j++) { 131 if (bb[j] != 0.0) goto ok1; 132 } 133 cnt++; 134 ok1:; 135 } 136 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 137 if (!n0rows) { 138 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 140 PetscFunctionReturn(0); 141 } 142 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 143 cnt = 0; 144 for (i=0; i<m; i++) { 145 na = ia[i+1] - ia[i]; 146 nb = ib[i+1] - ib[i]; 147 if (!na && !nb) continue; 148 aa = aav + ia[i]; 149 for (j=0; j<na;j++) { 150 if (aa[j] != 0.0) { 151 rows[cnt++] = rstart + i; 152 goto ok2; 153 } 154 } 155 bb = bav + ib[i]; 156 for (j=0; j<nb; j++) { 157 if (bb[j] != 0.0) { 158 rows[cnt++] = rstart + i; 159 goto ok2; 160 } 161 } 162 ok2:; 163 } 164 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 165 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 167 PetscFunctionReturn(0); 168 } 169 170 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 171 { 172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 173 PetscBool cong; 174 175 PetscFunctionBegin; 176 PetscCall(MatHasCongruentLayouts(Y,&cong)); 177 if (Y->assembled && cong) { 178 PetscCall(MatDiagonalSet(aij->A,D,is)); 179 } else { 180 PetscCall(MatDiagonalSet_Default(Y,D,is)); 181 } 182 PetscFunctionReturn(0); 183 } 184 185 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 188 PetscInt i,rstart,nrows,*rows; 189 190 PetscFunctionBegin; 191 *zrows = NULL; 192 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 193 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 194 for (i=0; i<nrows; i++) rows[i] += rstart; 195 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 200 { 201 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 202 PetscInt i,m,n,*garray = aij->garray; 203 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 204 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 205 PetscReal *work; 206 const PetscScalar *dummy; 207 208 PetscFunctionBegin; 209 PetscCall(MatGetSize(A,&m,&n)); 210 PetscCall(PetscCalloc1(n,&work)); 211 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 212 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 213 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 214 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 215 if (type == NORM_2) { 216 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 217 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 218 } 219 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 220 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 221 } 222 } else if (type == NORM_1) { 223 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 224 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 225 } 226 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 227 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 228 } 229 } else if (type == NORM_INFINITY) { 230 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 231 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 232 } 233 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 234 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 235 } 236 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 237 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 238 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 239 } 240 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 241 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 242 } 243 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 244 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 245 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 246 } 247 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 248 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 249 } 250 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 251 if (type == NORM_INFINITY) { 252 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 253 } else { 254 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 255 } 256 PetscCall(PetscFree(work)); 257 if (type == NORM_2) { 258 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 259 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 260 for (i=0; i<n; i++) reductions[i] /= m; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 266 { 267 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 268 IS sis,gis; 269 const PetscInt *isis,*igis; 270 PetscInt n,*iis,nsis,ngis,rstart,i; 271 272 PetscFunctionBegin; 273 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 274 PetscCall(MatFindNonzeroRows(a->B,&gis)); 275 PetscCall(ISGetSize(gis,&ngis)); 276 PetscCall(ISGetSize(sis,&nsis)); 277 PetscCall(ISGetIndices(sis,&isis)); 278 PetscCall(ISGetIndices(gis,&igis)); 279 280 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 281 PetscCall(PetscArraycpy(iis,igis,ngis)); 282 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 283 n = ngis + nsis; 284 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 285 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 286 for (i=0; i<n; i++) iis[i] += rstart; 287 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 288 289 PetscCall(ISRestoreIndices(sis,&isis)); 290 PetscCall(ISRestoreIndices(gis,&igis)); 291 PetscCall(ISDestroy(&sis)); 292 PetscCall(ISDestroy(&gis)); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 Local utility routine that creates a mapping from the global column 298 number to the local number in the off-diagonal part of the local 299 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 300 a slightly higher hash table cost; without it it is not scalable (each processor 301 has an order N integer array but is fast to access. 302 */ 303 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 306 PetscInt n = aij->B->cmap->n,i; 307 308 PetscFunctionBegin; 309 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 310 #if defined(PETSC_USE_CTABLE) 311 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 312 for (i=0; i<n; i++) { 313 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 314 } 315 #else 316 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 317 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 318 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 319 #endif 320 PetscFunctionReturn(0); 321 } 322 323 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 324 { \ 325 if (col <= lastcol1) low1 = 0; \ 326 else high1 = nrow1; \ 327 lastcol1 = col;\ 328 while (high1-low1 > 5) { \ 329 t = (low1+high1)/2; \ 330 if (rp1[t] > col) high1 = t; \ 331 else low1 = t; \ 332 } \ 333 for (_i=low1; _i<high1; _i++) { \ 334 if (rp1[_i] > col) break; \ 335 if (rp1[_i] == col) { \ 336 if (addv == ADD_VALUES) { \ 337 ap1[_i] += value; \ 338 /* Not sure LogFlops will slow dow the code or not */ \ 339 (void)PetscLogFlops(1.0); \ 340 } \ 341 else ap1[_i] = value; \ 342 goto a_noinsert; \ 343 } \ 344 } \ 345 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 346 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 347 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 348 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 349 N = nrow1++ - 1; a->nz++; high1++; \ 350 /* shift up all the later entries in this row */ \ 351 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 352 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 353 rp1[_i] = col; \ 354 ap1[_i] = value; \ 355 A->nonzerostate++;\ 356 a_noinsert: ; \ 357 ailen[row] = nrow1; \ 358 } 359 360 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 361 { \ 362 if (col <= lastcol2) low2 = 0; \ 363 else high2 = nrow2; \ 364 lastcol2 = col; \ 365 while (high2-low2 > 5) { \ 366 t = (low2+high2)/2; \ 367 if (rp2[t] > col) high2 = t; \ 368 else low2 = t; \ 369 } \ 370 for (_i=low2; _i<high2; _i++) { \ 371 if (rp2[_i] > col) break; \ 372 if (rp2[_i] == col) { \ 373 if (addv == ADD_VALUES) { \ 374 ap2[_i] += value; \ 375 (void)PetscLogFlops(1.0); \ 376 } \ 377 else ap2[_i] = value; \ 378 goto b_noinsert; \ 379 } \ 380 } \ 381 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 382 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 383 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 384 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 385 N = nrow2++ - 1; b->nz++; high2++; \ 386 /* shift up all the later entries in this row */ \ 387 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 388 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 389 rp2[_i] = col; \ 390 ap2[_i] = value; \ 391 B->nonzerostate++; \ 392 b_noinsert: ; \ 393 bilen[row] = nrow2; \ 394 } 395 396 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 397 { 398 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 400 PetscInt l,*garray = mat->garray,diag; 401 PetscScalar *aa,*ba; 402 403 PetscFunctionBegin; 404 /* code only works for square matrices A */ 405 406 /* find size of row to the left of the diagonal part */ 407 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 408 row = row - diag; 409 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 410 if (garray[b->j[b->i[row]+l]] > diag) break; 411 } 412 if (l) { 413 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 414 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 416 } 417 418 /* diagonal part */ 419 if (a->i[row+1]-a->i[row]) { 420 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 421 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 422 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 423 } 424 425 /* right of diagonal part */ 426 if (b->i[row+1]-b->i[row]-l) { 427 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 428 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 429 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 435 { 436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 437 PetscScalar value = 0.0; 438 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 439 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 440 PetscBool roworiented = aij->roworiented; 441 442 /* Some Variables required in the macro */ 443 Mat A = aij->A; 444 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 445 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 446 PetscBool ignorezeroentries = a->ignorezeroentries; 447 Mat B = aij->B; 448 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 449 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 450 MatScalar *aa,*ba; 451 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 452 PetscInt nonew; 453 MatScalar *ap1,*ap2; 454 455 PetscFunctionBegin; 456 PetscCall(MatSeqAIJGetArray(A,&aa)); 457 PetscCall(MatSeqAIJGetArray(B,&ba)); 458 for (i=0; i<m; i++) { 459 if (im[i] < 0) continue; 460 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 461 if (im[i] >= rstart && im[i] < rend) { 462 row = im[i] - rstart; 463 lastcol1 = -1; 464 rp1 = aj + ai[row]; 465 ap1 = aa + ai[row]; 466 rmax1 = aimax[row]; 467 nrow1 = ailen[row]; 468 low1 = 0; 469 high1 = nrow1; 470 lastcol2 = -1; 471 rp2 = bj + bi[row]; 472 ap2 = ba + bi[row]; 473 rmax2 = bimax[row]; 474 nrow2 = bilen[row]; 475 low2 = 0; 476 high2 = nrow2; 477 478 for (j=0; j<n; j++) { 479 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 480 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 481 if (in[j] >= cstart && in[j] < cend) { 482 col = in[j] - cstart; 483 nonew = a->nonew; 484 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 485 } else if (in[j] < 0) continue; 486 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 487 else { 488 if (mat->was_assembled) { 489 if (!aij->colmap) { 490 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 491 } 492 #if defined(PETSC_USE_CTABLE) 493 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 494 col--; 495 #else 496 col = aij->colmap[in[j]] - 1; 497 #endif 498 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 499 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 500 col = in[j]; 501 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 502 B = aij->B; 503 b = (Mat_SeqAIJ*)B->data; 504 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 505 rp2 = bj + bi[row]; 506 ap2 = ba + bi[row]; 507 rmax2 = bimax[row]; 508 nrow2 = bilen[row]; 509 low2 = 0; 510 high2 = nrow2; 511 bm = aij->B->rmap->n; 512 ba = b->a; 513 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 514 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 515 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 516 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 517 } 518 } else col = in[j]; 519 nonew = b->nonew; 520 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 521 } 522 } 523 } else { 524 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 525 if (!aij->donotstash) { 526 mat->assembled = PETSC_FALSE; 527 if (roworiented) { 528 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 529 } else { 530 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 531 } 532 } 533 } 534 } 535 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 536 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 537 PetscFunctionReturn(0); 538 } 539 540 /* 541 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 542 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 543 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 544 */ 545 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 546 { 547 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 548 Mat A = aij->A; /* diagonal part of the matrix */ 549 Mat B = aij->B; /* offdiagonal part of the matrix */ 550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 551 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 552 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 553 PetscInt *ailen = a->ilen,*aj = a->j; 554 PetscInt *bilen = b->ilen,*bj = b->j; 555 PetscInt am = aij->A->rmap->n,j; 556 PetscInt diag_so_far = 0,dnz; 557 PetscInt offd_so_far = 0,onz; 558 559 PetscFunctionBegin; 560 /* Iterate over all rows of the matrix */ 561 for (j=0; j<am; j++) { 562 dnz = onz = 0; 563 /* Iterate over all non-zero columns of the current row */ 564 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 565 /* If column is in the diagonal */ 566 if (mat_j[col] >= cstart && mat_j[col] < cend) { 567 aj[diag_so_far++] = mat_j[col] - cstart; 568 dnz++; 569 } else { /* off-diagonal entries */ 570 bj[offd_so_far++] = mat_j[col]; 571 onz++; 572 } 573 } 574 ailen[j] = dnz; 575 bilen[j] = onz; 576 } 577 PetscFunctionReturn(0); 578 } 579 580 /* 581 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 582 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 583 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 584 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 585 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 586 */ 587 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 Mat A = aij->A; /* diagonal part of the matrix */ 591 Mat B = aij->B; /* offdiagonal part of the matrix */ 592 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 593 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 594 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 595 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 596 PetscInt *ailen = a->ilen,*aj = a->j; 597 PetscInt *bilen = b->ilen,*bj = b->j; 598 PetscInt am = aij->A->rmap->n,j; 599 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 600 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 601 PetscScalar *aa = a->a,*ba = b->a; 602 603 PetscFunctionBegin; 604 /* Iterate over all rows of the matrix */ 605 for (j=0; j<am; j++) { 606 dnz_row = onz_row = 0; 607 rowstart_offd = full_offd_i[j]; 608 rowstart_diag = full_diag_i[j]; 609 /* Iterate over all non-zero columns of the current row */ 610 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 611 /* If column is in the diagonal */ 612 if (mat_j[col] >= cstart && mat_j[col] < cend) { 613 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 614 aa[rowstart_diag+dnz_row] = mat_a[col]; 615 dnz_row++; 616 } else { /* off-diagonal entries */ 617 bj[rowstart_offd+onz_row] = mat_j[col]; 618 ba[rowstart_offd+onz_row] = mat_a[col]; 619 onz_row++; 620 } 621 } 622 ailen[j] = dnz_row; 623 bilen[j] = onz_row; 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 632 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 633 634 PetscFunctionBegin; 635 for (i=0; i<m; i++) { 636 if (idxm[i] < 0) continue; /* negative row */ 637 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 638 if (idxm[i] >= rstart && idxm[i] < rend) { 639 row = idxm[i] - rstart; 640 for (j=0; j<n; j++) { 641 if (idxn[j] < 0) continue; /* negative column */ 642 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 643 if (idxn[j] >= cstart && idxn[j] < cend) { 644 col = idxn[j] - cstart; 645 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 646 } else { 647 if (!aij->colmap) { 648 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 649 } 650 #if defined(PETSC_USE_CTABLE) 651 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 652 col--; 653 #else 654 col = aij->colmap[idxn[j]] - 1; 655 #endif 656 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 657 else { 658 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 659 } 660 } 661 } 662 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 663 } 664 PetscFunctionReturn(0); 665 } 666 667 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 668 { 669 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 670 PetscInt nstash,reallocs; 671 672 PetscFunctionBegin; 673 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 674 675 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 676 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 677 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 678 PetscFunctionReturn(0); 679 } 680 681 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 682 { 683 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 684 PetscMPIInt n; 685 PetscInt i,j,rstart,ncols,flg; 686 PetscInt *row,*col; 687 PetscBool other_disassembled; 688 PetscScalar *val; 689 690 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 691 692 PetscFunctionBegin; 693 if (!aij->donotstash && !mat->nooffprocentries) { 694 while (1) { 695 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 696 if (!flg) break; 697 698 for (i=0; i<n;) { 699 /* Now identify the consecutive vals belonging to the same row */ 700 for (j=i,rstart=row[j]; j<n; j++) { 701 if (row[j] != rstart) break; 702 } 703 if (j < n) ncols = j-i; 704 else ncols = n-i; 705 /* Now assemble all these values with a single function call */ 706 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 707 i = j; 708 } 709 } 710 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 711 } 712 #if defined(PETSC_HAVE_DEVICE) 713 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 714 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 715 if (mat->boundtocpu) { 716 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 717 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 718 } 719 #endif 720 PetscCall(MatAssemblyBegin(aij->A,mode)); 721 PetscCall(MatAssemblyEnd(aij->A,mode)); 722 723 /* determine if any processor has disassembled, if so we must 724 also disassemble ourself, in order that we may reassemble. */ 725 /* 726 if nonzero structure of submatrix B cannot change then we know that 727 no processor disassembled thus we can skip this stuff 728 */ 729 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 730 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 731 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 732 PetscCall(MatDisAssemble_MPIAIJ(mat)); 733 } 734 } 735 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 736 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 737 } 738 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 739 #if defined(PETSC_HAVE_DEVICE) 740 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 741 #endif 742 PetscCall(MatAssemblyBegin(aij->B,mode)); 743 PetscCall(MatAssemblyEnd(aij->B,mode)); 744 745 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 746 747 aij->rowvalues = NULL; 748 749 PetscCall(VecDestroy(&aij->diag)); 750 751 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 752 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 753 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 754 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 755 } 756 #if defined(PETSC_HAVE_DEVICE) 757 mat->offloadmask = PETSC_OFFLOAD_BOTH; 758 #endif 759 PetscFunctionReturn(0); 760 } 761 762 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 763 { 764 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 765 766 PetscFunctionBegin; 767 PetscCall(MatZeroEntries(l->A)); 768 PetscCall(MatZeroEntries(l->B)); 769 PetscFunctionReturn(0); 770 } 771 772 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 773 { 774 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 775 PetscObjectState sA, sB; 776 PetscInt *lrows; 777 PetscInt r, len; 778 PetscBool cong, lch, gch; 779 780 PetscFunctionBegin; 781 /* get locally owned rows */ 782 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 783 PetscCall(MatHasCongruentLayouts(A,&cong)); 784 /* fix right hand side if needed */ 785 if (x && b) { 786 const PetscScalar *xx; 787 PetscScalar *bb; 788 789 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 790 PetscCall(VecGetArrayRead(x, &xx)); 791 PetscCall(VecGetArray(b, &bb)); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 PetscCall(VecRestoreArrayRead(x, &xx)); 794 PetscCall(VecRestoreArray(b, &bb)); 795 } 796 797 sA = mat->A->nonzerostate; 798 sB = mat->B->nonzerostate; 799 800 if (diag != 0.0 && cong) { 801 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 802 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 803 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 804 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 805 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 806 PetscInt nnwA, nnwB; 807 PetscBool nnzA, nnzB; 808 809 nnwA = aijA->nonew; 810 nnwB = aijB->nonew; 811 nnzA = aijA->keepnonzeropattern; 812 nnzB = aijB->keepnonzeropattern; 813 if (!nnzA) { 814 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 815 aijA->nonew = 0; 816 } 817 if (!nnzB) { 818 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 819 aijB->nonew = 0; 820 } 821 /* Must zero here before the next loop */ 822 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 823 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 824 for (r = 0; r < len; ++r) { 825 const PetscInt row = lrows[r] + A->rmap->rstart; 826 if (row >= A->cmap->N) continue; 827 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 828 } 829 aijA->nonew = nnwA; 830 aijB->nonew = nnwB; 831 } else { 832 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 833 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 834 } 835 PetscCall(PetscFree(lrows)); 836 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 837 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 838 839 /* reduce nonzerostate */ 840 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 841 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 842 if (gch) A->nonzerostate++; 843 PetscFunctionReturn(0); 844 } 845 846 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 847 { 848 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 849 PetscMPIInt n = A->rmap->n; 850 PetscInt i,j,r,m,len = 0; 851 PetscInt *lrows,*owners = A->rmap->range; 852 PetscMPIInt p = 0; 853 PetscSFNode *rrows; 854 PetscSF sf; 855 const PetscScalar *xx; 856 PetscScalar *bb,*mask,*aij_a; 857 Vec xmask,lmask; 858 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 859 const PetscInt *aj, *ii,*ridx; 860 PetscScalar *aa; 861 862 PetscFunctionBegin; 863 /* Create SF where leaves are input rows and roots are owned rows */ 864 PetscCall(PetscMalloc1(n, &lrows)); 865 for (r = 0; r < n; ++r) lrows[r] = -1; 866 PetscCall(PetscMalloc1(N, &rrows)); 867 for (r = 0; r < N; ++r) { 868 const PetscInt idx = rows[r]; 869 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 870 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 871 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 872 } 873 rrows[r].rank = p; 874 rrows[r].index = rows[r] - owners[p]; 875 } 876 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 877 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 878 /* Collect flags for rows to be zeroed */ 879 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 880 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 881 PetscCall(PetscSFDestroy(&sf)); 882 /* Compress and put in row numbers */ 883 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 884 /* zero diagonal part of matrix */ 885 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 886 /* handle off diagonal part of matrix */ 887 PetscCall(MatCreateVecs(A,&xmask,NULL)); 888 PetscCall(VecDuplicate(l->lvec,&lmask)); 889 PetscCall(VecGetArray(xmask,&bb)); 890 for (i=0; i<len; i++) bb[lrows[i]] = 1; 891 PetscCall(VecRestoreArray(xmask,&bb)); 892 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 893 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 894 PetscCall(VecDestroy(&xmask)); 895 if (x && b) { /* this code is buggy when the row and column layout don't match */ 896 PetscBool cong; 897 898 PetscCall(MatHasCongruentLayouts(A,&cong)); 899 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 900 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 901 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecGetArrayRead(l->lvec,&xx)); 903 PetscCall(VecGetArray(b,&bb)); 904 } 905 PetscCall(VecGetArray(lmask,&mask)); 906 /* remove zeroed rows of off diagonal matrix */ 907 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 908 ii = aij->i; 909 for (i=0; i<len; i++) { 910 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 911 } 912 /* loop over all elements of off process part of matrix zeroing removed columns*/ 913 if (aij->compressedrow.use) { 914 m = aij->compressedrow.nrows; 915 ii = aij->compressedrow.i; 916 ridx = aij->compressedrow.rindex; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij_a + ii[i]; 921 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[*ridx] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 ridx++; 931 } 932 } else { /* do not use compressed row format */ 933 m = l->B->rmap->n; 934 for (i=0; i<m; i++) { 935 n = ii[i+1] - ii[i]; 936 aj = aij->j + ii[i]; 937 aa = aij_a + ii[i]; 938 for (j=0; j<n; j++) { 939 if (PetscAbsScalar(mask[*aj])) { 940 if (b) bb[i] -= *aa*xx[*aj]; 941 *aa = 0.0; 942 } 943 aa++; 944 aj++; 945 } 946 } 947 } 948 if (x && b) { 949 PetscCall(VecRestoreArray(b,&bb)); 950 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 951 } 952 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 953 PetscCall(VecRestoreArray(lmask,&mask)); 954 PetscCall(VecDestroy(&lmask)); 955 PetscCall(PetscFree(lrows)); 956 957 /* only change matrix nonzero state if pattern was allowed to be changed */ 958 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 959 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 960 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 961 } 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscInt nt; 969 VecScatter Mvctx = a->Mvctx; 970 971 PetscFunctionBegin; 972 PetscCall(VecGetLocalSize(xx,&nt)); 973 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 974 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 975 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 976 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 977 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 985 PetscFunctionBegin; 986 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 991 { 992 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 993 VecScatter Mvctx = a->Mvctx; 994 995 PetscFunctionBegin; 996 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 997 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 998 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 999 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1004 { 1005 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1006 1007 PetscFunctionBegin; 1008 /* do nondiagonal part */ 1009 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1010 /* do local part */ 1011 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1012 /* add partial results together */ 1013 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1014 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1019 { 1020 MPI_Comm comm; 1021 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1022 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1023 IS Me,Notme; 1024 PetscInt M,N,first,last,*notme,i; 1025 PetscBool lf; 1026 PetscMPIInt size; 1027 1028 PetscFunctionBegin; 1029 /* Easy test: symmetric diagonal block */ 1030 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1031 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1032 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1033 if (!*f) PetscFunctionReturn(0); 1034 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1035 PetscCallMPI(MPI_Comm_size(comm,&size)); 1036 if (size == 1) PetscFunctionReturn(0); 1037 1038 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1039 PetscCall(MatGetSize(Amat,&M,&N)); 1040 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1041 PetscCall(PetscMalloc1(N-last+first,¬me)); 1042 for (i=0; i<first; i++) notme[i] = i; 1043 for (i=last; i<M; i++) notme[i-last+first] = i; 1044 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1045 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1046 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1047 Aoff = Aoffs[0]; 1048 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1049 Boff = Boffs[0]; 1050 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1051 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1052 PetscCall(MatDestroyMatrices(1,&Boffs)); 1053 PetscCall(ISDestroy(&Me)); 1054 PetscCall(ISDestroy(&Notme)); 1055 PetscCall(PetscFree(notme)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1060 { 1061 PetscFunctionBegin; 1062 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1067 { 1068 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1073 /* do local part */ 1074 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1075 /* add partial results together */ 1076 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1077 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 /* 1082 This only works correctly for square matrices where the subblock A->A is the 1083 diagonal block 1084 */ 1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1086 { 1087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1088 1089 PetscFunctionBegin; 1090 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1091 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1092 PetscCall(MatGetDiagonal(a->A,v)); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatScale(a->A,aa)); 1102 PetscCall(MatScale(a->B,aa)); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1107 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1108 { 1109 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1110 1111 PetscFunctionBegin; 1112 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1113 PetscCall(PetscFree(aij->Aperm1)); 1114 PetscCall(PetscFree(aij->Bperm1)); 1115 PetscCall(PetscFree(aij->Ajmap1)); 1116 PetscCall(PetscFree(aij->Bjmap1)); 1117 1118 PetscCall(PetscFree(aij->Aimap2)); 1119 PetscCall(PetscFree(aij->Bimap2)); 1120 PetscCall(PetscFree(aij->Aperm2)); 1121 PetscCall(PetscFree(aij->Bperm2)); 1122 PetscCall(PetscFree(aij->Ajmap2)); 1123 PetscCall(PetscFree(aij->Bjmap2)); 1124 1125 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1126 PetscCall(PetscFree(aij->Cperm1)); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1131 { 1132 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1133 1134 PetscFunctionBegin; 1135 #if defined(PETSC_USE_LOG) 1136 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1137 #endif 1138 PetscCall(MatStashDestroy_Private(&mat->stash)); 1139 PetscCall(VecDestroy(&aij->diag)); 1140 PetscCall(MatDestroy(&aij->A)); 1141 PetscCall(MatDestroy(&aij->B)); 1142 #if defined(PETSC_USE_CTABLE) 1143 PetscCall(PetscTableDestroy(&aij->colmap)); 1144 #else 1145 PetscCall(PetscFree(aij->colmap)); 1146 #endif 1147 PetscCall(PetscFree(aij->garray)); 1148 PetscCall(VecDestroy(&aij->lvec)); 1149 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1150 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1151 PetscCall(PetscFree(aij->ld)); 1152 1153 /* Free COO */ 1154 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1155 1156 PetscCall(PetscFree(mat->data)); 1157 1158 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1159 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1160 1161 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1166 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1167 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1169 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1171 #if defined(PETSC_HAVE_CUDA) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1173 #endif 1174 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1176 #endif 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1178 #if defined(PETSC_HAVE_ELEMENTAL) 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1180 #endif 1181 #if defined(PETSC_HAVE_SCALAPACK) 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1183 #endif 1184 #if defined(PETSC_HAVE_HYPRE) 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1187 #endif 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1194 #if defined(PETSC_HAVE_MKL_SPARSE) 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1196 #endif 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1206 { 1207 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1208 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1209 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1210 const PetscInt *garray = aij->garray; 1211 const PetscScalar *aa,*ba; 1212 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1213 PetscInt *rowlens; 1214 PetscInt *colidxs; 1215 PetscScalar *matvals; 1216 1217 PetscFunctionBegin; 1218 PetscCall(PetscViewerSetUp(viewer)); 1219 1220 M = mat->rmap->N; 1221 N = mat->cmap->N; 1222 m = mat->rmap->n; 1223 rs = mat->rmap->rstart; 1224 cs = mat->cmap->rstart; 1225 nz = A->nz + B->nz; 1226 1227 /* write matrix header */ 1228 header[0] = MAT_FILE_CLASSID; 1229 header[1] = M; header[2] = N; header[3] = nz; 1230 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1231 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1232 1233 /* fill in and store row lengths */ 1234 PetscCall(PetscMalloc1(m,&rowlens)); 1235 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1236 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1237 PetscCall(PetscFree(rowlens)); 1238 1239 /* fill in and store column indices */ 1240 PetscCall(PetscMalloc1(nz,&colidxs)); 1241 for (cnt=0, i=0; i<m; i++) { 1242 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 colidxs[cnt++] = garray[B->j[jb]]; 1245 } 1246 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1247 colidxs[cnt++] = A->j[ja] + cs; 1248 for (; jb<B->i[i+1]; jb++) 1249 colidxs[cnt++] = garray[B->j[jb]]; 1250 } 1251 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1253 PetscCall(PetscFree(colidxs)); 1254 1255 /* fill in and store nonzero values */ 1256 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1257 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1258 PetscCall(PetscMalloc1(nz,&matvals)); 1259 for (cnt=0, i=0; i<m; i++) { 1260 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1261 if (garray[B->j[jb]] > cs) break; 1262 matvals[cnt++] = ba[jb]; 1263 } 1264 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1265 matvals[cnt++] = aa[ja]; 1266 for (; jb<B->i[i+1]; jb++) 1267 matvals[cnt++] = ba[jb]; 1268 } 1269 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1271 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1272 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1273 PetscCall(PetscFree(matvals)); 1274 1275 /* write block size option to the viewer's .info file */ 1276 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 PetscMPIInt rank = aij->rank,size = aij->size; 1285 PetscBool isdraw,iascii,isbinary; 1286 PetscViewer sviewer; 1287 PetscViewerFormat format; 1288 1289 PetscFunctionBegin; 1290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1293 if (iascii) { 1294 PetscCall(PetscViewerGetFormat(viewer,&format)); 1295 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1296 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1297 PetscCall(PetscMalloc1(size,&nz)); 1298 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1299 for (i=0; i<(PetscInt)size; i++) { 1300 nmax = PetscMax(nmax,nz[i]); 1301 nmin = PetscMin(nmin,nz[i]); 1302 navg += nz[i]; 1303 } 1304 PetscCall(PetscFree(nz)); 1305 navg = navg/size; 1306 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1307 PetscFunctionReturn(0); 1308 } 1309 PetscCall(PetscViewerGetFormat(viewer,&format)); 1310 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1311 MatInfo info; 1312 PetscInt *inodes=NULL; 1313 1314 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1315 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1316 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1317 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1318 if (!inodes) { 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1321 } else { 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1324 } 1325 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1326 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1327 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1329 PetscCall(PetscViewerFlush(viewer)); 1330 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1331 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1332 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1335 PetscInt inodecount,inodelimit,*inodes; 1336 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1337 if (inodes) { 1338 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1339 } else { 1340 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1341 } 1342 PetscFunctionReturn(0); 1343 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1344 PetscFunctionReturn(0); 1345 } 1346 } else if (isbinary) { 1347 if (size == 1) { 1348 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1349 PetscCall(MatView(aij->A,viewer)); 1350 } else { 1351 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1352 } 1353 PetscFunctionReturn(0); 1354 } else if (iascii && size == 1) { 1355 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1356 PetscCall(MatView(aij->A,viewer)); 1357 PetscFunctionReturn(0); 1358 } else if (isdraw) { 1359 PetscDraw draw; 1360 PetscBool isnull; 1361 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1362 PetscCall(PetscDrawIsNull(draw,&isnull)); 1363 if (isnull) PetscFunctionReturn(0); 1364 } 1365 1366 { /* assemble the entire matrix onto first processor */ 1367 Mat A = NULL, Av; 1368 IS isrow,iscol; 1369 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1372 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1373 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1374 /* The commented code uses MatCreateSubMatrices instead */ 1375 /* 1376 Mat *AA, A = NULL, Av; 1377 IS isrow,iscol; 1378 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1381 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1382 if (rank == 0) { 1383 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1384 A = AA[0]; 1385 Av = AA[0]; 1386 } 1387 PetscCall(MatDestroySubMatrices(1,&AA)); 1388 */ 1389 PetscCall(ISDestroy(&iscol)); 1390 PetscCall(ISDestroy(&isrow)); 1391 /* 1392 Everyone has to call to draw the matrix since the graphics waits are 1393 synchronized across all processors that share the PetscDraw object 1394 */ 1395 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1396 if (rank == 0) { 1397 if (((PetscObject)mat)->name) { 1398 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1399 } 1400 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1401 } 1402 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1403 PetscCall(PetscViewerFlush(viewer)); 1404 PetscCall(MatDestroy(&A)); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1410 { 1411 PetscBool iascii,isdraw,issocket,isbinary; 1412 1413 PetscFunctionBegin; 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1418 if (iascii || isdraw || isbinary || issocket) { 1419 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1425 { 1426 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1427 Vec bb1 = NULL; 1428 PetscBool hasop; 1429 1430 PetscFunctionBegin; 1431 if (flag == SOR_APPLY_UPPER) { 1432 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1437 PetscCall(VecDuplicate(bb,&bb1)); 1438 } 1439 1440 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1441 if (flag & SOR_ZERO_INITIAL_GUESS) { 1442 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1443 its--; 1444 } 1445 1446 while (its--) { 1447 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1448 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1449 1450 /* update rhs: bb1 = bb - B*x */ 1451 PetscCall(VecScale(mat->lvec,-1.0)); 1452 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1453 1454 /* local sweep */ 1455 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1456 } 1457 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1458 if (flag & SOR_ZERO_INITIAL_GUESS) { 1459 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1460 its--; 1461 } 1462 while (its--) { 1463 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1464 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1465 1466 /* update rhs: bb1 = bb - B*x */ 1467 PetscCall(VecScale(mat->lvec,-1.0)); 1468 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1469 1470 /* local sweep */ 1471 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1472 } 1473 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1474 if (flag & SOR_ZERO_INITIAL_GUESS) { 1475 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1476 its--; 1477 } 1478 while (its--) { 1479 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1480 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1481 1482 /* update rhs: bb1 = bb - B*x */ 1483 PetscCall(VecScale(mat->lvec,-1.0)); 1484 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1485 1486 /* local sweep */ 1487 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1488 } 1489 } else if (flag & SOR_EISENSTAT) { 1490 Vec xx1; 1491 1492 PetscCall(VecDuplicate(bb,&xx1)); 1493 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1494 1495 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1496 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1497 if (!mat->diag) { 1498 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1499 PetscCall(MatGetDiagonal(matin,mat->diag)); 1500 } 1501 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1502 if (hasop) { 1503 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1504 } else { 1505 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1506 } 1507 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1508 1509 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1510 1511 /* local sweep */ 1512 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1513 PetscCall(VecAXPY(xx,1.0,xx1)); 1514 PetscCall(VecDestroy(&xx1)); 1515 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1516 1517 PetscCall(VecDestroy(&bb1)); 1518 1519 matin->factorerrortype = mat->A->factorerrortype; 1520 PetscFunctionReturn(0); 1521 } 1522 1523 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1524 { 1525 Mat aA,aB,Aperm; 1526 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1527 PetscScalar *aa,*ba; 1528 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1529 PetscSF rowsf,sf; 1530 IS parcolp = NULL; 1531 PetscBool done; 1532 1533 PetscFunctionBegin; 1534 PetscCall(MatGetLocalSize(A,&m,&n)); 1535 PetscCall(ISGetIndices(rowp,&rwant)); 1536 PetscCall(ISGetIndices(colp,&cwant)); 1537 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1538 1539 /* Invert row permutation to find out where my rows should go */ 1540 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1541 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1542 PetscCall(PetscSFSetFromOptions(rowsf)); 1543 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1544 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1545 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1546 1547 /* Invert column permutation to find out where my columns should go */ 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1549 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1550 PetscCall(PetscSFSetFromOptions(sf)); 1551 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1552 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1553 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1554 PetscCall(PetscSFDestroy(&sf)); 1555 1556 PetscCall(ISRestoreIndices(rowp,&rwant)); 1557 PetscCall(ISRestoreIndices(colp,&cwant)); 1558 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1559 1560 /* Find out where my gcols should go */ 1561 PetscCall(MatGetSize(aB,NULL,&ng)); 1562 PetscCall(PetscMalloc1(ng,&gcdest)); 1563 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1564 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1565 PetscCall(PetscSFSetFromOptions(sf)); 1566 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1568 PetscCall(PetscSFDestroy(&sf)); 1569 1570 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1571 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1572 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1573 for (i=0; i<m; i++) { 1574 PetscInt row = rdest[i]; 1575 PetscMPIInt rowner; 1576 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1577 for (j=ai[i]; j<ai[i+1]; j++) { 1578 PetscInt col = cdest[aj[j]]; 1579 PetscMPIInt cowner; 1580 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1581 if (rowner == cowner) dnnz[i]++; 1582 else onnz[i]++; 1583 } 1584 for (j=bi[i]; j<bi[i+1]; j++) { 1585 PetscInt col = gcdest[bj[j]]; 1586 PetscMPIInt cowner; 1587 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1588 if (rowner == cowner) dnnz[i]++; 1589 else onnz[i]++; 1590 } 1591 } 1592 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1593 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1594 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1595 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1596 PetscCall(PetscSFDestroy(&rowsf)); 1597 1598 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1599 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1600 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1601 for (i=0; i<m; i++) { 1602 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1603 PetscInt j0,rowlen; 1604 rowlen = ai[i+1] - ai[i]; 1605 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1606 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1607 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1608 } 1609 rowlen = bi[i+1] - bi[i]; 1610 for (j0=j=0; j<rowlen; j0=j) { 1611 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1612 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1613 } 1614 } 1615 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1616 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1618 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1619 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1620 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1621 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1622 PetscCall(PetscFree3(work,rdest,cdest)); 1623 PetscCall(PetscFree(gcdest)); 1624 if (parcolp) PetscCall(ISDestroy(&colp)); 1625 *B = Aperm; 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1630 { 1631 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1632 1633 PetscFunctionBegin; 1634 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1635 if (ghosts) *ghosts = aij->garray; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1642 Mat A = mat->A,B = mat->B; 1643 PetscLogDouble isend[5],irecv[5]; 1644 1645 PetscFunctionBegin; 1646 info->block_size = 1.0; 1647 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1648 1649 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1650 isend[3] = info->memory; isend[4] = info->mallocs; 1651 1652 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1653 1654 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1655 isend[3] += info->memory; isend[4] += info->mallocs; 1656 if (flag == MAT_LOCAL) { 1657 info->nz_used = isend[0]; 1658 info->nz_allocated = isend[1]; 1659 info->nz_unneeded = isend[2]; 1660 info->memory = isend[3]; 1661 info->mallocs = isend[4]; 1662 } else if (flag == MAT_GLOBAL_MAX) { 1663 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1664 1665 info->nz_used = irecv[0]; 1666 info->nz_allocated = irecv[1]; 1667 info->nz_unneeded = irecv[2]; 1668 info->memory = irecv[3]; 1669 info->mallocs = irecv[4]; 1670 } else if (flag == MAT_GLOBAL_SUM) { 1671 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1672 1673 info->nz_used = irecv[0]; 1674 info->nz_allocated = irecv[1]; 1675 info->nz_unneeded = irecv[2]; 1676 info->memory = irecv[3]; 1677 info->mallocs = irecv[4]; 1678 } 1679 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1680 info->fill_ratio_needed = 0; 1681 info->factor_mallocs = 0; 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1686 { 1687 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1688 1689 PetscFunctionBegin; 1690 switch (op) { 1691 case MAT_NEW_NONZERO_LOCATIONS: 1692 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1693 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1694 case MAT_KEEP_NONZERO_PATTERN: 1695 case MAT_NEW_NONZERO_LOCATION_ERR: 1696 case MAT_USE_INODES: 1697 case MAT_IGNORE_ZERO_ENTRIES: 1698 case MAT_FORM_EXPLICIT_TRANSPOSE: 1699 MatCheckPreallocated(A,1); 1700 PetscCall(MatSetOption(a->A,op,flg)); 1701 PetscCall(MatSetOption(a->B,op,flg)); 1702 break; 1703 case MAT_ROW_ORIENTED: 1704 MatCheckPreallocated(A,1); 1705 a->roworiented = flg; 1706 1707 PetscCall(MatSetOption(a->A,op,flg)); 1708 PetscCall(MatSetOption(a->B,op,flg)); 1709 break; 1710 case MAT_FORCE_DIAGONAL_ENTRIES: 1711 case MAT_SORTED_FULL: 1712 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1713 break; 1714 case MAT_IGNORE_OFF_PROC_ENTRIES: 1715 a->donotstash = flg; 1716 break; 1717 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1718 case MAT_SPD: 1719 case MAT_SYMMETRIC: 1720 case MAT_STRUCTURALLY_SYMMETRIC: 1721 case MAT_HERMITIAN: 1722 case MAT_SYMMETRY_ETERNAL: 1723 break; 1724 case MAT_SUBMAT_SINGLEIS: 1725 A->submat_singleis = flg; 1726 break; 1727 case MAT_STRUCTURE_ONLY: 1728 /* The option is handled directly by MatSetOption() */ 1729 break; 1730 default: 1731 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1732 } 1733 PetscFunctionReturn(0); 1734 } 1735 1736 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1737 { 1738 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1739 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1740 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1741 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1742 PetscInt *cmap,*idx_p; 1743 1744 PetscFunctionBegin; 1745 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1746 mat->getrowactive = PETSC_TRUE; 1747 1748 if (!mat->rowvalues && (idx || v)) { 1749 /* 1750 allocate enough space to hold information from the longest row. 1751 */ 1752 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1753 PetscInt max = 1,tmp; 1754 for (i=0; i<matin->rmap->n; i++) { 1755 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1756 if (max < tmp) max = tmp; 1757 } 1758 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1759 } 1760 1761 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1762 lrow = row - rstart; 1763 1764 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1765 if (!v) {pvA = NULL; pvB = NULL;} 1766 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1767 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1768 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1769 nztot = nzA + nzB; 1770 1771 cmap = mat->garray; 1772 if (v || idx) { 1773 if (nztot) { 1774 /* Sort by increasing column numbers, assuming A and B already sorted */ 1775 PetscInt imark = -1; 1776 if (v) { 1777 *v = v_p = mat->rowvalues; 1778 for (i=0; i<nzB; i++) { 1779 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1780 else break; 1781 } 1782 imark = i; 1783 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1784 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1785 } 1786 if (idx) { 1787 *idx = idx_p = mat->rowindices; 1788 if (imark > -1) { 1789 for (i=0; i<imark; i++) { 1790 idx_p[i] = cmap[cworkB[i]]; 1791 } 1792 } else { 1793 for (i=0; i<nzB; i++) { 1794 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1795 else break; 1796 } 1797 imark = i; 1798 } 1799 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1800 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1801 } 1802 } else { 1803 if (idx) *idx = NULL; 1804 if (v) *v = NULL; 1805 } 1806 } 1807 *nz = nztot; 1808 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1809 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1814 { 1815 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1816 1817 PetscFunctionBegin; 1818 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1819 aij->getrowactive = PETSC_FALSE; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1826 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1827 PetscInt i,j,cstart = mat->cmap->rstart; 1828 PetscReal sum = 0.0; 1829 const MatScalar *v,*amata,*bmata; 1830 1831 PetscFunctionBegin; 1832 if (aij->size == 1) { 1833 PetscCall(MatNorm(aij->A,type,norm)); 1834 } else { 1835 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1836 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1837 if (type == NORM_FROBENIUS) { 1838 v = amata; 1839 for (i=0; i<amat->nz; i++) { 1840 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1841 } 1842 v = bmata; 1843 for (i=0; i<bmat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1845 } 1846 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1847 *norm = PetscSqrtReal(*norm); 1848 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1849 } else if (type == NORM_1) { /* max column norm */ 1850 PetscReal *tmp,*tmp2; 1851 PetscInt *jj,*garray = aij->garray; 1852 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1853 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1854 *norm = 0.0; 1855 v = amata; jj = amat->j; 1856 for (j=0; j<amat->nz; j++) { 1857 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1858 } 1859 v = bmata; jj = bmat->j; 1860 for (j=0; j<bmat->nz; j++) { 1861 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1862 } 1863 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1864 for (j=0; j<mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 PetscCall(PetscFree(tmp)); 1868 PetscCall(PetscFree(tmp2)); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j=0; j<aij->A->rmap->n; j++) { 1873 v = amata + amat->i[j]; 1874 sum = 0.0; 1875 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); v++; 1877 } 1878 v = bmata + bmat->i[j]; 1879 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1880 sum += PetscAbsScalar(*v); v++; 1881 } 1882 if (sum > ntemp) ntemp = sum; 1883 } 1884 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1885 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1886 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1887 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1888 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1894 { 1895 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1896 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1897 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1898 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1899 Mat B,A_diag,*B_diag; 1900 const MatScalar *pbv,*bv; 1901 1902 PetscFunctionBegin; 1903 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1904 ai = Aloc->i; aj = Aloc->j; 1905 bi = Bloc->i; bj = Bloc->j; 1906 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1907 PetscInt *d_nnz,*g_nnz,*o_nnz; 1908 PetscSFNode *oloc; 1909 PETSC_UNUSED PetscSF sf; 1910 1911 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1912 /* compute d_nnz for preallocation */ 1913 PetscCall(PetscArrayzero(d_nnz,na)); 1914 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1915 /* compute local off-diagonal contributions */ 1916 PetscCall(PetscArrayzero(g_nnz,nb)); 1917 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1918 /* map those to global */ 1919 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1920 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1921 PetscCall(PetscSFSetFromOptions(sf)); 1922 PetscCall(PetscArrayzero(o_nnz,na)); 1923 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1924 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1925 PetscCall(PetscSFDestroy(&sf)); 1926 1927 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1928 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1929 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1930 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1931 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1932 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1933 } else { 1934 B = *matout; 1935 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1936 } 1937 1938 b = (Mat_MPIAIJ*)B->data; 1939 A_diag = a->A; 1940 B_diag = &b->A; 1941 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1942 A_diag_ncol = A_diag->cmap->N; 1943 B_diag_ilen = sub_B_diag->ilen; 1944 B_diag_i = sub_B_diag->i; 1945 1946 /* Set ilen for diagonal of B */ 1947 for (i=0; i<A_diag_ncol; i++) { 1948 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1949 } 1950 1951 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1952 very quickly (=without using MatSetValues), because all writes are local. */ 1953 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1954 1955 /* copy over the B part */ 1956 PetscCall(PetscMalloc1(bi[mb],&cols)); 1957 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1958 pbv = bv; 1959 row = A->rmap->rstart; 1960 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1961 cols_tmp = cols; 1962 for (i=0; i<mb; i++) { 1963 ncol = bi[i+1]-bi[i]; 1964 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1965 row++; 1966 pbv += ncol; cols_tmp += ncol; 1967 } 1968 PetscCall(PetscFree(cols)); 1969 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1970 1971 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1972 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1973 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1974 *matout = B; 1975 } else { 1976 PetscCall(MatHeaderMerge(A,&B)); 1977 } 1978 PetscFunctionReturn(0); 1979 } 1980 1981 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1982 { 1983 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1984 Mat a = aij->A,b = aij->B; 1985 PetscInt s1,s2,s3; 1986 1987 PetscFunctionBegin; 1988 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1989 if (rr) { 1990 PetscCall(VecGetLocalSize(rr,&s1)); 1991 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1992 /* Overlap communication with computation. */ 1993 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1994 } 1995 if (ll) { 1996 PetscCall(VecGetLocalSize(ll,&s1)); 1997 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1998 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 1999 } 2000 /* scale the diagonal block */ 2001 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2002 2003 if (rr) { 2004 /* Do a scatter end and then right scale the off-diagonal block */ 2005 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2006 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2007 } 2008 PetscFunctionReturn(0); 2009 } 2010 2011 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2012 { 2013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2014 2015 PetscFunctionBegin; 2016 PetscCall(MatSetUnfactored(a->A)); 2017 PetscFunctionReturn(0); 2018 } 2019 2020 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2021 { 2022 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2023 Mat a,b,c,d; 2024 PetscBool flg; 2025 2026 PetscFunctionBegin; 2027 a = matA->A; b = matA->B; 2028 c = matB->A; d = matB->B; 2029 2030 PetscCall(MatEqual(a,c,&flg)); 2031 if (flg) { 2032 PetscCall(MatEqual(b,d,&flg)); 2033 } 2034 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2039 { 2040 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2041 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2042 2043 PetscFunctionBegin; 2044 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2045 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2046 /* because of the column compression in the off-processor part of the matrix a->B, 2047 the number of columns in a->B and b->B may be different, hence we cannot call 2048 the MatCopy() directly on the two parts. If need be, we can provide a more 2049 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2050 then copying the submatrices */ 2051 PetscCall(MatCopy_Basic(A,B,str)); 2052 } else { 2053 PetscCall(MatCopy(a->A,b->A,str)); 2054 PetscCall(MatCopy(a->B,b->B,str)); 2055 } 2056 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2061 { 2062 PetscFunctionBegin; 2063 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 /* 2068 Computes the number of nonzeros per row needed for preallocation when X and Y 2069 have different nonzero structure. 2070 */ 2071 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2072 { 2073 PetscInt i,j,k,nzx,nzy; 2074 2075 PetscFunctionBegin; 2076 /* Set the number of nonzeros in the new matrix */ 2077 for (i=0; i<m; i++) { 2078 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2079 nzx = xi[i+1] - xi[i]; 2080 nzy = yi[i+1] - yi[i]; 2081 nnz[i] = 0; 2082 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2083 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2084 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2085 nnz[i]++; 2086 } 2087 for (; k<nzy; k++) nnz[i]++; 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2094 { 2095 PetscInt m = Y->rmap->N; 2096 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2097 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2098 2099 PetscFunctionBegin; 2100 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2105 { 2106 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2107 2108 PetscFunctionBegin; 2109 if (str == SAME_NONZERO_PATTERN) { 2110 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2111 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2112 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2113 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2114 } else { 2115 Mat B; 2116 PetscInt *nnz_d,*nnz_o; 2117 2118 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2119 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2120 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2121 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2122 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2123 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2124 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2125 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2126 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2127 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2128 PetscCall(MatHeaderMerge(Y,&B)); 2129 PetscCall(PetscFree(nnz_d)); 2130 PetscCall(PetscFree(nnz_o)); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2136 2137 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2138 { 2139 PetscFunctionBegin; 2140 if (PetscDefined(USE_COMPLEX)) { 2141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2142 2143 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2144 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2145 } 2146 PetscFunctionReturn(0); 2147 } 2148 2149 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2152 2153 PetscFunctionBegin; 2154 PetscCall(MatRealPart(a->A)); 2155 PetscCall(MatRealPart(a->B)); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2160 { 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2162 2163 PetscFunctionBegin; 2164 PetscCall(MatImaginaryPart(a->A)); 2165 PetscCall(MatImaginaryPart(a->B)); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2170 { 2171 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2172 PetscInt i,*idxb = NULL,m = A->rmap->n; 2173 PetscScalar *va,*vv; 2174 Vec vB,vA; 2175 const PetscScalar *vb; 2176 2177 PetscFunctionBegin; 2178 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2179 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2180 2181 PetscCall(VecGetArrayWrite(vA,&va)); 2182 if (idx) { 2183 for (i=0; i<m; i++) { 2184 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2185 } 2186 } 2187 2188 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2189 PetscCall(PetscMalloc1(m,&idxb)); 2190 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2191 2192 PetscCall(VecGetArrayWrite(v,&vv)); 2193 PetscCall(VecGetArrayRead(vB,&vb)); 2194 for (i=0; i<m; i++) { 2195 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2196 vv[i] = vb[i]; 2197 if (idx) idx[i] = a->garray[idxb[i]]; 2198 } else { 2199 vv[i] = va[i]; 2200 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2201 idx[i] = a->garray[idxb[i]]; 2202 } 2203 } 2204 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2205 PetscCall(VecRestoreArrayWrite(vA,&va)); 2206 PetscCall(VecRestoreArrayRead(vB,&vb)); 2207 PetscCall(PetscFree(idxb)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscCall(VecDestroy(&vB)); 2210 PetscFunctionReturn(0); 2211 } 2212 2213 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2214 { 2215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2216 PetscInt m = A->rmap->n,n = A->cmap->n; 2217 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2218 PetscInt *cmap = mat->garray; 2219 PetscInt *diagIdx, *offdiagIdx; 2220 Vec diagV, offdiagV; 2221 PetscScalar *a, *diagA, *offdiagA; 2222 const PetscScalar *ba,*bav; 2223 PetscInt r,j,col,ncols,*bi,*bj; 2224 Mat B = mat->B; 2225 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2226 2227 PetscFunctionBegin; 2228 /* When a process holds entire A and other processes have no entry */ 2229 if (A->cmap->N == n) { 2230 PetscCall(VecGetArrayWrite(v,&diagA)); 2231 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2232 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2233 PetscCall(VecDestroy(&diagV)); 2234 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2235 PetscFunctionReturn(0); 2236 } else if (n == 0) { 2237 if (m) { 2238 PetscCall(VecGetArrayWrite(v,&a)); 2239 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2240 PetscCall(VecRestoreArrayWrite(v,&a)); 2241 } 2242 PetscFunctionReturn(0); 2243 } 2244 2245 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2248 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2249 2250 /* Get offdiagIdx[] for implicit 0.0 */ 2251 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2252 ba = bav; 2253 bi = b->i; 2254 bj = b->j; 2255 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2256 for (r = 0; r < m; r++) { 2257 ncols = bi[r+1] - bi[r]; 2258 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2259 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2260 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2261 offdiagA[r] = 0.0; 2262 2263 /* Find first hole in the cmap */ 2264 for (j=0; j<ncols; j++) { 2265 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2266 if (col > j && j < cstart) { 2267 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2268 break; 2269 } else if (col > j + n && j >= cstart) { 2270 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2271 break; 2272 } 2273 } 2274 if (j == ncols && ncols < A->cmap->N - n) { 2275 /* a hole is outside compressed Bcols */ 2276 if (ncols == 0) { 2277 if (cstart) { 2278 offdiagIdx[r] = 0; 2279 } else offdiagIdx[r] = cend; 2280 } else { /* ncols > 0 */ 2281 offdiagIdx[r] = cmap[ncols-1] + 1; 2282 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2283 } 2284 } 2285 } 2286 2287 for (j=0; j<ncols; j++) { 2288 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2289 ba++; bj++; 2290 } 2291 } 2292 2293 PetscCall(VecGetArrayWrite(v, &a)); 2294 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2295 for (r = 0; r < m; ++r) { 2296 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2297 a[r] = diagA[r]; 2298 if (idx) idx[r] = cstart + diagIdx[r]; 2299 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) { 2302 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2303 idx[r] = cstart + diagIdx[r]; 2304 } else idx[r] = offdiagIdx[r]; 2305 } 2306 } else { 2307 a[r] = offdiagA[r]; 2308 if (idx) idx[r] = offdiagIdx[r]; 2309 } 2310 } 2311 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2312 PetscCall(VecRestoreArrayWrite(v, &a)); 2313 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2314 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2315 PetscCall(VecDestroy(&diagV)); 2316 PetscCall(VecDestroy(&offdiagV)); 2317 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2318 PetscFunctionReturn(0); 2319 } 2320 2321 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2322 { 2323 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2324 PetscInt m = A->rmap->n,n = A->cmap->n; 2325 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2326 PetscInt *cmap = mat->garray; 2327 PetscInt *diagIdx, *offdiagIdx; 2328 Vec diagV, offdiagV; 2329 PetscScalar *a, *diagA, *offdiagA; 2330 const PetscScalar *ba,*bav; 2331 PetscInt r,j,col,ncols,*bi,*bj; 2332 Mat B = mat->B; 2333 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2334 2335 PetscFunctionBegin; 2336 /* When a process holds entire A and other processes have no entry */ 2337 if (A->cmap->N == n) { 2338 PetscCall(VecGetArrayWrite(v,&diagA)); 2339 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2340 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2343 PetscFunctionReturn(0); 2344 } else if (n == 0) { 2345 if (m) { 2346 PetscCall(VecGetArrayWrite(v,&a)); 2347 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2348 PetscCall(VecRestoreArrayWrite(v,&a)); 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 2353 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2354 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2355 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2356 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2357 2358 /* Get offdiagIdx[] for implicit 0.0 */ 2359 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2360 ba = bav; 2361 bi = b->i; 2362 bj = b->j; 2363 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2364 for (r = 0; r < m; r++) { 2365 ncols = bi[r+1] - bi[r]; 2366 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2367 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2368 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2369 offdiagA[r] = 0.0; 2370 2371 /* Find first hole in the cmap */ 2372 for (j=0; j<ncols; j++) { 2373 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2374 if (col > j && j < cstart) { 2375 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2376 break; 2377 } else if (col > j + n && j >= cstart) { 2378 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2379 break; 2380 } 2381 } 2382 if (j == ncols && ncols < A->cmap->N - n) { 2383 /* a hole is outside compressed Bcols */ 2384 if (ncols == 0) { 2385 if (cstart) { 2386 offdiagIdx[r] = 0; 2387 } else offdiagIdx[r] = cend; 2388 } else { /* ncols > 0 */ 2389 offdiagIdx[r] = cmap[ncols-1] + 1; 2390 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2391 } 2392 } 2393 } 2394 2395 for (j=0; j<ncols; j++) { 2396 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2397 ba++; bj++; 2398 } 2399 } 2400 2401 PetscCall(VecGetArrayWrite(v, &a)); 2402 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2403 for (r = 0; r < m; ++r) { 2404 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2405 a[r] = diagA[r]; 2406 if (idx) idx[r] = cstart + diagIdx[r]; 2407 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2408 a[r] = diagA[r]; 2409 if (idx) { 2410 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2411 idx[r] = cstart + diagIdx[r]; 2412 } else idx[r] = offdiagIdx[r]; 2413 } 2414 } else { 2415 a[r] = offdiagA[r]; 2416 if (idx) idx[r] = offdiagIdx[r]; 2417 } 2418 } 2419 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2420 PetscCall(VecRestoreArrayWrite(v, &a)); 2421 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2422 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2423 PetscCall(VecDestroy(&diagV)); 2424 PetscCall(VecDestroy(&offdiagV)); 2425 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2426 PetscFunctionReturn(0); 2427 } 2428 2429 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2430 { 2431 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2432 PetscInt m = A->rmap->n,n = A->cmap->n; 2433 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2434 PetscInt *cmap = mat->garray; 2435 PetscInt *diagIdx, *offdiagIdx; 2436 Vec diagV, offdiagV; 2437 PetscScalar *a, *diagA, *offdiagA; 2438 const PetscScalar *ba,*bav; 2439 PetscInt r,j,col,ncols,*bi,*bj; 2440 Mat B = mat->B; 2441 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2442 2443 PetscFunctionBegin; 2444 /* When a process holds entire A and other processes have no entry */ 2445 if (A->cmap->N == n) { 2446 PetscCall(VecGetArrayWrite(v,&diagA)); 2447 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2448 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2449 PetscCall(VecDestroy(&diagV)); 2450 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2451 PetscFunctionReturn(0); 2452 } else if (n == 0) { 2453 if (m) { 2454 PetscCall(VecGetArrayWrite(v,&a)); 2455 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2456 PetscCall(VecRestoreArrayWrite(v,&a)); 2457 } 2458 PetscFunctionReturn(0); 2459 } 2460 2461 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2462 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2463 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2464 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2465 2466 /* Get offdiagIdx[] for implicit 0.0 */ 2467 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2468 ba = bav; 2469 bi = b->i; 2470 bj = b->j; 2471 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2472 for (r = 0; r < m; r++) { 2473 ncols = bi[r+1] - bi[r]; 2474 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2475 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2476 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2477 offdiagA[r] = 0.0; 2478 2479 /* Find first hole in the cmap */ 2480 for (j=0; j<ncols; j++) { 2481 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2482 if (col > j && j < cstart) { 2483 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2484 break; 2485 } else if (col > j + n && j >= cstart) { 2486 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2487 break; 2488 } 2489 } 2490 if (j == ncols && ncols < A->cmap->N - n) { 2491 /* a hole is outside compressed Bcols */ 2492 if (ncols == 0) { 2493 if (cstart) { 2494 offdiagIdx[r] = 0; 2495 } else offdiagIdx[r] = cend; 2496 } else { /* ncols > 0 */ 2497 offdiagIdx[r] = cmap[ncols-1] + 1; 2498 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2499 } 2500 } 2501 } 2502 2503 for (j=0; j<ncols; j++) { 2504 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2505 ba++; bj++; 2506 } 2507 } 2508 2509 PetscCall(VecGetArrayWrite(v, &a)); 2510 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2511 for (r = 0; r < m; ++r) { 2512 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 if (idx) idx[r] = cstart + diagIdx[r]; 2515 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2516 a[r] = diagA[r]; 2517 if (idx) { 2518 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2519 idx[r] = cstart + diagIdx[r]; 2520 } else idx[r] = offdiagIdx[r]; 2521 } 2522 } else { 2523 a[r] = offdiagA[r]; 2524 if (idx) idx[r] = offdiagIdx[r]; 2525 } 2526 } 2527 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2528 PetscCall(VecRestoreArrayWrite(v, &a)); 2529 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2530 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2531 PetscCall(VecDestroy(&diagV)); 2532 PetscCall(VecDestroy(&offdiagV)); 2533 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2538 { 2539 Mat *dummy; 2540 2541 PetscFunctionBegin; 2542 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2543 *newmat = *dummy; 2544 PetscCall(PetscFree(dummy)); 2545 PetscFunctionReturn(0); 2546 } 2547 2548 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2549 { 2550 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2551 2552 PetscFunctionBegin; 2553 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2554 A->factorerrortype = a->A->factorerrortype; 2555 PetscFunctionReturn(0); 2556 } 2557 2558 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2559 { 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2564 PetscCall(MatSetRandom(aij->A,rctx)); 2565 if (x->assembled) { 2566 PetscCall(MatSetRandom(aij->B,rctx)); 2567 } else { 2568 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2569 } 2570 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2571 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2576 { 2577 PetscFunctionBegin; 2578 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2579 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2580 PetscFunctionReturn(0); 2581 } 2582 2583 /*@ 2584 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2585 2586 Collective on Mat 2587 2588 Input Parameters: 2589 + A - the matrix 2590 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2591 2592 Level: advanced 2593 2594 @*/ 2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2596 { 2597 PetscFunctionBegin; 2598 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2603 { 2604 PetscBool sc = PETSC_FALSE,flg; 2605 2606 PetscFunctionBegin; 2607 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2608 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2609 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2610 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2611 PetscOptionsHeadEnd(); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2616 { 2617 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2618 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2619 2620 PetscFunctionBegin; 2621 if (!Y->preallocated) { 2622 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2623 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2624 PetscInt nonew = aij->nonew; 2625 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2626 aij->nonew = nonew; 2627 } 2628 PetscCall(MatShift_Basic(Y,a)); 2629 PetscFunctionReturn(0); 2630 } 2631 2632 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2633 { 2634 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2635 2636 PetscFunctionBegin; 2637 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2638 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2639 if (d) { 2640 PetscInt rstart; 2641 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2642 *d += rstart; 2643 2644 } 2645 PetscFunctionReturn(0); 2646 } 2647 2648 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2649 { 2650 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2651 2652 PetscFunctionBegin; 2653 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2654 PetscFunctionReturn(0); 2655 } 2656 2657 /* -------------------------------------------------------------------*/ 2658 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2659 MatGetRow_MPIAIJ, 2660 MatRestoreRow_MPIAIJ, 2661 MatMult_MPIAIJ, 2662 /* 4*/ MatMultAdd_MPIAIJ, 2663 MatMultTranspose_MPIAIJ, 2664 MatMultTransposeAdd_MPIAIJ, 2665 NULL, 2666 NULL, 2667 NULL, 2668 /*10*/ NULL, 2669 NULL, 2670 NULL, 2671 MatSOR_MPIAIJ, 2672 MatTranspose_MPIAIJ, 2673 /*15*/ MatGetInfo_MPIAIJ, 2674 MatEqual_MPIAIJ, 2675 MatGetDiagonal_MPIAIJ, 2676 MatDiagonalScale_MPIAIJ, 2677 MatNorm_MPIAIJ, 2678 /*20*/ MatAssemblyBegin_MPIAIJ, 2679 MatAssemblyEnd_MPIAIJ, 2680 MatSetOption_MPIAIJ, 2681 MatZeroEntries_MPIAIJ, 2682 /*24*/ MatZeroRows_MPIAIJ, 2683 NULL, 2684 NULL, 2685 NULL, 2686 NULL, 2687 /*29*/ MatSetUp_MPIAIJ, 2688 NULL, 2689 NULL, 2690 MatGetDiagonalBlock_MPIAIJ, 2691 NULL, 2692 /*34*/ MatDuplicate_MPIAIJ, 2693 NULL, 2694 NULL, 2695 NULL, 2696 NULL, 2697 /*39*/ MatAXPY_MPIAIJ, 2698 MatCreateSubMatrices_MPIAIJ, 2699 MatIncreaseOverlap_MPIAIJ, 2700 MatGetValues_MPIAIJ, 2701 MatCopy_MPIAIJ, 2702 /*44*/ MatGetRowMax_MPIAIJ, 2703 MatScale_MPIAIJ, 2704 MatShift_MPIAIJ, 2705 MatDiagonalSet_MPIAIJ, 2706 MatZeroRowsColumns_MPIAIJ, 2707 /*49*/ MatSetRandom_MPIAIJ, 2708 MatGetRowIJ_MPIAIJ, 2709 MatRestoreRowIJ_MPIAIJ, 2710 NULL, 2711 NULL, 2712 /*54*/ MatFDColoringCreate_MPIXAIJ, 2713 NULL, 2714 MatSetUnfactored_MPIAIJ, 2715 MatPermute_MPIAIJ, 2716 NULL, 2717 /*59*/ MatCreateSubMatrix_MPIAIJ, 2718 MatDestroy_MPIAIJ, 2719 MatView_MPIAIJ, 2720 NULL, 2721 NULL, 2722 /*64*/ NULL, 2723 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2728 MatGetRowMinAbs_MPIAIJ, 2729 NULL, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /*75*/ MatFDColoringApply_AIJ, 2734 MatSetFromOptions_MPIAIJ, 2735 NULL, 2736 NULL, 2737 MatFindZeroDiagonals_MPIAIJ, 2738 /*80*/ NULL, 2739 NULL, 2740 NULL, 2741 /*83*/ MatLoad_MPIAIJ, 2742 MatIsSymmetric_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*89*/ NULL, 2748 NULL, 2749 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2750 NULL, 2751 NULL, 2752 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 MatBindToCPU_MPIAIJ, 2757 /*99*/ MatProductSetFromOptions_MPIAIJ, 2758 NULL, 2759 NULL, 2760 MatConjugate_MPIAIJ, 2761 NULL, 2762 /*104*/MatSetValuesRow_MPIAIJ, 2763 MatRealPart_MPIAIJ, 2764 MatImaginaryPart_MPIAIJ, 2765 NULL, 2766 NULL, 2767 /*109*/NULL, 2768 NULL, 2769 MatGetRowMin_MPIAIJ, 2770 NULL, 2771 MatMissingDiagonal_MPIAIJ, 2772 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2773 NULL, 2774 MatGetGhosts_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*119*/MatMultDiagonalBlock_MPIAIJ, 2778 NULL, 2779 NULL, 2780 NULL, 2781 MatGetMultiProcBlock_MPIAIJ, 2782 /*124*/MatFindNonzeroRows_MPIAIJ, 2783 MatGetColumnReductions_MPIAIJ, 2784 MatInvertBlockDiagonal_MPIAIJ, 2785 MatInvertVariableBlockDiagonal_MPIAIJ, 2786 MatCreateSubMatricesMPI_MPIAIJ, 2787 /*129*/NULL, 2788 NULL, 2789 NULL, 2790 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2791 NULL, 2792 /*134*/NULL, 2793 NULL, 2794 NULL, 2795 NULL, 2796 NULL, 2797 /*139*/MatSetBlockSizes_MPIAIJ, 2798 NULL, 2799 NULL, 2800 MatFDColoringSetUp_MPIXAIJ, 2801 MatFindOffBlockDiagonalEntries_MPIAIJ, 2802 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2803 /*145*/NULL, 2804 NULL, 2805 NULL, 2806 MatCreateGraph_Simple_AIJ, 2807 MatFilter_AIJ 2808 }; 2809 2810 /* ----------------------------------------------------------------------------------------*/ 2811 2812 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2813 { 2814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2815 2816 PetscFunctionBegin; 2817 PetscCall(MatStoreValues(aij->A)); 2818 PetscCall(MatStoreValues(aij->B)); 2819 PetscFunctionReturn(0); 2820 } 2821 2822 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2823 { 2824 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2825 2826 PetscFunctionBegin; 2827 PetscCall(MatRetrieveValues(aij->A)); 2828 PetscCall(MatRetrieveValues(aij->B)); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2833 { 2834 Mat_MPIAIJ *b; 2835 PetscMPIInt size; 2836 2837 PetscFunctionBegin; 2838 PetscCall(PetscLayoutSetUp(B->rmap)); 2839 PetscCall(PetscLayoutSetUp(B->cmap)); 2840 b = (Mat_MPIAIJ*)B->data; 2841 2842 #if defined(PETSC_USE_CTABLE) 2843 PetscCall(PetscTableDestroy(&b->colmap)); 2844 #else 2845 PetscCall(PetscFree(b->colmap)); 2846 #endif 2847 PetscCall(PetscFree(b->garray)); 2848 PetscCall(VecDestroy(&b->lvec)); 2849 PetscCall(VecScatterDestroy(&b->Mvctx)); 2850 2851 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2852 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2853 PetscCall(MatDestroy(&b->B)); 2854 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2855 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2856 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2857 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2858 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2859 2860 if (!B->preallocated) { 2861 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2862 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2863 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2864 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2865 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2866 } 2867 2868 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2869 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2870 B->preallocated = PETSC_TRUE; 2871 B->was_assembled = PETSC_FALSE; 2872 B->assembled = PETSC_FALSE; 2873 PetscFunctionReturn(0); 2874 } 2875 2876 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2877 { 2878 Mat_MPIAIJ *b; 2879 2880 PetscFunctionBegin; 2881 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2882 PetscCall(PetscLayoutSetUp(B->rmap)); 2883 PetscCall(PetscLayoutSetUp(B->cmap)); 2884 b = (Mat_MPIAIJ*)B->data; 2885 2886 #if defined(PETSC_USE_CTABLE) 2887 PetscCall(PetscTableDestroy(&b->colmap)); 2888 #else 2889 PetscCall(PetscFree(b->colmap)); 2890 #endif 2891 PetscCall(PetscFree(b->garray)); 2892 PetscCall(VecDestroy(&b->lvec)); 2893 PetscCall(VecScatterDestroy(&b->Mvctx)); 2894 2895 PetscCall(MatResetPreallocation(b->A)); 2896 PetscCall(MatResetPreallocation(b->B)); 2897 B->preallocated = PETSC_TRUE; 2898 B->was_assembled = PETSC_FALSE; 2899 B->assembled = PETSC_FALSE; 2900 PetscFunctionReturn(0); 2901 } 2902 2903 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2904 { 2905 Mat mat; 2906 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2907 2908 PetscFunctionBegin; 2909 *newmat = NULL; 2910 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2911 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2912 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2913 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2914 a = (Mat_MPIAIJ*)mat->data; 2915 2916 mat->factortype = matin->factortype; 2917 mat->assembled = matin->assembled; 2918 mat->insertmode = NOT_SET_VALUES; 2919 mat->preallocated = matin->preallocated; 2920 2921 a->size = oldmat->size; 2922 a->rank = oldmat->rank; 2923 a->donotstash = oldmat->donotstash; 2924 a->roworiented = oldmat->roworiented; 2925 a->rowindices = NULL; 2926 a->rowvalues = NULL; 2927 a->getrowactive = PETSC_FALSE; 2928 2929 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2930 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2931 2932 if (oldmat->colmap) { 2933 #if defined(PETSC_USE_CTABLE) 2934 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2935 #else 2936 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2937 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2938 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2939 #endif 2940 } else a->colmap = NULL; 2941 if (oldmat->garray) { 2942 PetscInt len; 2943 len = oldmat->B->cmap->n; 2944 PetscCall(PetscMalloc1(len+1,&a->garray)); 2945 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2946 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2947 } else a->garray = NULL; 2948 2949 /* It may happen MatDuplicate is called with a non-assembled matrix 2950 In fact, MatDuplicate only requires the matrix to be preallocated 2951 This may happen inside a DMCreateMatrix_Shell */ 2952 if (oldmat->lvec) { 2953 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2954 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2955 } 2956 if (oldmat->Mvctx) { 2957 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2958 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2959 } 2960 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2961 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2962 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2963 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2964 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2965 *newmat = mat; 2966 PetscFunctionReturn(0); 2967 } 2968 2969 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2970 { 2971 PetscBool isbinary, ishdf5; 2972 2973 PetscFunctionBegin; 2974 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2975 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2976 /* force binary viewer to load .info file if it has not yet done so */ 2977 PetscCall(PetscViewerSetUp(viewer)); 2978 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2979 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2980 if (isbinary) { 2981 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2982 } else if (ishdf5) { 2983 #if defined(PETSC_HAVE_HDF5) 2984 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2985 #else 2986 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2987 #endif 2988 } else { 2989 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993 2994 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2995 { 2996 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2997 PetscInt *rowidxs,*colidxs; 2998 PetscScalar *matvals; 2999 3000 PetscFunctionBegin; 3001 PetscCall(PetscViewerSetUp(viewer)); 3002 3003 /* read in matrix header */ 3004 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3005 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3006 M = header[1]; N = header[2]; nz = header[3]; 3007 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3008 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3009 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3010 3011 /* set block sizes from the viewer's .info file */ 3012 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3013 /* set global sizes if not set already */ 3014 if (mat->rmap->N < 0) mat->rmap->N = M; 3015 if (mat->cmap->N < 0) mat->cmap->N = N; 3016 PetscCall(PetscLayoutSetUp(mat->rmap)); 3017 PetscCall(PetscLayoutSetUp(mat->cmap)); 3018 3019 /* check if the matrix sizes are correct */ 3020 PetscCall(MatGetSize(mat,&rows,&cols)); 3021 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3022 3023 /* read in row lengths and build row indices */ 3024 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3025 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3026 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3027 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3028 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3029 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3030 /* read in column indices and matrix values */ 3031 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3032 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3033 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3034 /* store matrix indices and values */ 3035 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3036 PetscCall(PetscFree(rowidxs)); 3037 PetscCall(PetscFree2(colidxs,matvals)); 3038 PetscFunctionReturn(0); 3039 } 3040 3041 /* Not scalable because of ISAllGather() unless getting all columns. */ 3042 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3043 { 3044 IS iscol_local; 3045 PetscBool isstride; 3046 PetscMPIInt lisstride=0,gisstride; 3047 3048 PetscFunctionBegin; 3049 /* check if we are grabbing all columns*/ 3050 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3051 3052 if (isstride) { 3053 PetscInt start,len,mstart,mlen; 3054 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3055 PetscCall(ISGetLocalSize(iscol,&len)); 3056 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3057 if (mstart == start && mlen-mstart == len) lisstride = 1; 3058 } 3059 3060 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3061 if (gisstride) { 3062 PetscInt N; 3063 PetscCall(MatGetSize(mat,NULL,&N)); 3064 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3065 PetscCall(ISSetIdentity(iscol_local)); 3066 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3067 } else { 3068 PetscInt cbs; 3069 PetscCall(ISGetBlockSize(iscol,&cbs)); 3070 PetscCall(ISAllGather(iscol,&iscol_local)); 3071 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3072 } 3073 3074 *isseq = iscol_local; 3075 PetscFunctionReturn(0); 3076 } 3077 3078 /* 3079 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3080 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3081 3082 Input Parameters: 3083 mat - matrix 3084 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3085 i.e., mat->rstart <= isrow[i] < mat->rend 3086 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3087 i.e., mat->cstart <= iscol[i] < mat->cend 3088 Output Parameter: 3089 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3090 iscol_o - sequential column index set for retrieving mat->B 3091 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3092 */ 3093 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3094 { 3095 Vec x,cmap; 3096 const PetscInt *is_idx; 3097 PetscScalar *xarray,*cmaparray; 3098 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3099 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3100 Mat B=a->B; 3101 Vec lvec=a->lvec,lcmap; 3102 PetscInt i,cstart,cend,Bn=B->cmap->N; 3103 MPI_Comm comm; 3104 VecScatter Mvctx=a->Mvctx; 3105 3106 PetscFunctionBegin; 3107 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3108 PetscCall(ISGetLocalSize(iscol,&ncols)); 3109 3110 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3111 PetscCall(MatCreateVecs(mat,&x,NULL)); 3112 PetscCall(VecSet(x,-1.0)); 3113 PetscCall(VecDuplicate(x,&cmap)); 3114 PetscCall(VecSet(cmap,-1.0)); 3115 3116 /* Get start indices */ 3117 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3118 isstart -= ncols; 3119 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3120 3121 PetscCall(ISGetIndices(iscol,&is_idx)); 3122 PetscCall(VecGetArray(x,&xarray)); 3123 PetscCall(VecGetArray(cmap,&cmaparray)); 3124 PetscCall(PetscMalloc1(ncols,&idx)); 3125 for (i=0; i<ncols; i++) { 3126 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3127 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3128 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3129 } 3130 PetscCall(VecRestoreArray(x,&xarray)); 3131 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3132 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3133 3134 /* Get iscol_d */ 3135 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3136 PetscCall(ISGetBlockSize(iscol,&i)); 3137 PetscCall(ISSetBlockSize(*iscol_d,i)); 3138 3139 /* Get isrow_d */ 3140 PetscCall(ISGetLocalSize(isrow,&m)); 3141 rstart = mat->rmap->rstart; 3142 PetscCall(PetscMalloc1(m,&idx)); 3143 PetscCall(ISGetIndices(isrow,&is_idx)); 3144 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3145 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3146 3147 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3148 PetscCall(ISGetBlockSize(isrow,&i)); 3149 PetscCall(ISSetBlockSize(*isrow_d,i)); 3150 3151 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3152 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3153 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3154 3155 PetscCall(VecDuplicate(lvec,&lcmap)); 3156 3157 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3158 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3159 3160 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3161 /* off-process column indices */ 3162 count = 0; 3163 PetscCall(PetscMalloc1(Bn,&idx)); 3164 PetscCall(PetscMalloc1(Bn,&cmap1)); 3165 3166 PetscCall(VecGetArray(lvec,&xarray)); 3167 PetscCall(VecGetArray(lcmap,&cmaparray)); 3168 for (i=0; i<Bn; i++) { 3169 if (PetscRealPart(xarray[i]) > -1.0) { 3170 idx[count] = i; /* local column index in off-diagonal part B */ 3171 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3172 count++; 3173 } 3174 } 3175 PetscCall(VecRestoreArray(lvec,&xarray)); 3176 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3177 3178 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3179 /* cannot ensure iscol_o has same blocksize as iscol! */ 3180 3181 PetscCall(PetscFree(idx)); 3182 *garray = cmap1; 3183 3184 PetscCall(VecDestroy(&x)); 3185 PetscCall(VecDestroy(&cmap)); 3186 PetscCall(VecDestroy(&lcmap)); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3191 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3192 { 3193 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3194 Mat M = NULL; 3195 MPI_Comm comm; 3196 IS iscol_d,isrow_d,iscol_o; 3197 Mat Asub = NULL,Bsub = NULL; 3198 PetscInt n; 3199 3200 PetscFunctionBegin; 3201 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3202 3203 if (call == MAT_REUSE_MATRIX) { 3204 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3205 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3206 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3207 3208 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3209 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3210 3211 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3212 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3213 3214 /* Update diagonal and off-diagonal portions of submat */ 3215 asub = (Mat_MPIAIJ*)(*submat)->data; 3216 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3217 PetscCall(ISGetLocalSize(iscol_o,&n)); 3218 if (n) { 3219 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3220 } 3221 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3222 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3223 3224 } else { /* call == MAT_INITIAL_MATRIX) */ 3225 const PetscInt *garray; 3226 PetscInt BsubN; 3227 3228 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3229 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3230 3231 /* Create local submatrices Asub and Bsub */ 3232 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3233 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3234 3235 /* Create submatrix M */ 3236 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3237 3238 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3239 asub = (Mat_MPIAIJ*)M->data; 3240 3241 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3242 n = asub->B->cmap->N; 3243 if (BsubN > n) { 3244 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3245 const PetscInt *idx; 3246 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3247 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3248 3249 PetscCall(PetscMalloc1(n,&idx_new)); 3250 j = 0; 3251 PetscCall(ISGetIndices(iscol_o,&idx)); 3252 for (i=0; i<n; i++) { 3253 if (j >= BsubN) break; 3254 while (subgarray[i] > garray[j]) j++; 3255 3256 if (subgarray[i] == garray[j]) { 3257 idx_new[i] = idx[j++]; 3258 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3259 } 3260 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3261 3262 PetscCall(ISDestroy(&iscol_o)); 3263 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3264 3265 } else if (BsubN < n) { 3266 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3267 } 3268 3269 PetscCall(PetscFree(garray)); 3270 *submat = M; 3271 3272 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3273 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3274 PetscCall(ISDestroy(&isrow_d)); 3275 3276 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3277 PetscCall(ISDestroy(&iscol_d)); 3278 3279 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3280 PetscCall(ISDestroy(&iscol_o)); 3281 } 3282 PetscFunctionReturn(0); 3283 } 3284 3285 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3286 { 3287 IS iscol_local=NULL,isrow_d; 3288 PetscInt csize; 3289 PetscInt n,i,j,start,end; 3290 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3291 MPI_Comm comm; 3292 3293 PetscFunctionBegin; 3294 /* If isrow has same processor distribution as mat, 3295 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3296 if (call == MAT_REUSE_MATRIX) { 3297 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3298 if (isrow_d) { 3299 sameRowDist = PETSC_TRUE; 3300 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3301 } else { 3302 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3303 if (iscol_local) { 3304 sameRowDist = PETSC_TRUE; 3305 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3306 } 3307 } 3308 } else { 3309 /* Check if isrow has same processor distribution as mat */ 3310 sameDist[0] = PETSC_FALSE; 3311 PetscCall(ISGetLocalSize(isrow,&n)); 3312 if (!n) { 3313 sameDist[0] = PETSC_TRUE; 3314 } else { 3315 PetscCall(ISGetMinMax(isrow,&i,&j)); 3316 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3317 if (i >= start && j < end) { 3318 sameDist[0] = PETSC_TRUE; 3319 } 3320 } 3321 3322 /* Check if iscol has same processor distribution as mat */ 3323 sameDist[1] = PETSC_FALSE; 3324 PetscCall(ISGetLocalSize(iscol,&n)); 3325 if (!n) { 3326 sameDist[1] = PETSC_TRUE; 3327 } else { 3328 PetscCall(ISGetMinMax(iscol,&i,&j)); 3329 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3330 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3331 } 3332 3333 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3334 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3335 sameRowDist = tsameDist[0]; 3336 } 3337 3338 if (sameRowDist) { 3339 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3340 /* isrow and iscol have same processor distribution as mat */ 3341 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3342 PetscFunctionReturn(0); 3343 } else { /* sameRowDist */ 3344 /* isrow has same processor distribution as mat */ 3345 if (call == MAT_INITIAL_MATRIX) { 3346 PetscBool sorted; 3347 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3348 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3349 PetscCall(ISGetSize(iscol,&i)); 3350 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3351 3352 PetscCall(ISSorted(iscol_local,&sorted)); 3353 if (sorted) { 3354 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3355 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3356 PetscFunctionReturn(0); 3357 } 3358 } else { /* call == MAT_REUSE_MATRIX */ 3359 IS iscol_sub; 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3361 if (iscol_sub) { 3362 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3363 PetscFunctionReturn(0); 3364 } 3365 } 3366 } 3367 } 3368 3369 /* General case: iscol -> iscol_local which has global size of iscol */ 3370 if (call == MAT_REUSE_MATRIX) { 3371 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3372 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3373 } else { 3374 if (!iscol_local) { 3375 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3376 } 3377 } 3378 3379 PetscCall(ISGetLocalSize(iscol,&csize)); 3380 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3381 3382 if (call == MAT_INITIAL_MATRIX) { 3383 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3384 PetscCall(ISDestroy(&iscol_local)); 3385 } 3386 PetscFunctionReturn(0); 3387 } 3388 3389 /*@C 3390 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3391 and "off-diagonal" part of the matrix in CSR format. 3392 3393 Collective 3394 3395 Input Parameters: 3396 + comm - MPI communicator 3397 . A - "diagonal" portion of matrix 3398 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3399 - garray - global index of B columns 3400 3401 Output Parameter: 3402 . mat - the matrix, with input A as its local diagonal matrix 3403 Level: advanced 3404 3405 Notes: 3406 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3407 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3408 3409 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3410 @*/ 3411 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3412 { 3413 Mat_MPIAIJ *maij; 3414 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3415 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3416 const PetscScalar *oa; 3417 Mat Bnew; 3418 PetscInt m,n,N; 3419 3420 PetscFunctionBegin; 3421 PetscCall(MatCreate(comm,mat)); 3422 PetscCall(MatGetSize(A,&m,&n)); 3423 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3424 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3425 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3426 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3427 3428 /* Get global columns of mat */ 3429 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3430 3431 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3432 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3433 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3434 maij = (Mat_MPIAIJ*)(*mat)->data; 3435 3436 (*mat)->preallocated = PETSC_TRUE; 3437 3438 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3439 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3440 3441 /* Set A as diagonal portion of *mat */ 3442 maij->A = A; 3443 3444 nz = oi[m]; 3445 for (i=0; i<nz; i++) { 3446 col = oj[i]; 3447 oj[i] = garray[col]; 3448 } 3449 3450 /* Set Bnew as off-diagonal portion of *mat */ 3451 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3452 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3453 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3454 bnew = (Mat_SeqAIJ*)Bnew->data; 3455 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3456 maij->B = Bnew; 3457 3458 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3459 3460 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3461 b->free_a = PETSC_FALSE; 3462 b->free_ij = PETSC_FALSE; 3463 PetscCall(MatDestroy(&B)); 3464 3465 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3466 bnew->free_a = PETSC_TRUE; 3467 bnew->free_ij = PETSC_TRUE; 3468 3469 /* condense columns of maij->B */ 3470 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3471 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3472 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3473 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3474 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3475 PetscFunctionReturn(0); 3476 } 3477 3478 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3479 3480 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3481 { 3482 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3483 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3484 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3485 Mat M,Msub,B=a->B; 3486 MatScalar *aa; 3487 Mat_SeqAIJ *aij; 3488 PetscInt *garray = a->garray,*colsub,Ncols; 3489 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3490 IS iscol_sub,iscmap; 3491 const PetscInt *is_idx,*cmap; 3492 PetscBool allcolumns=PETSC_FALSE; 3493 MPI_Comm comm; 3494 3495 PetscFunctionBegin; 3496 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3497 if (call == MAT_REUSE_MATRIX) { 3498 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3499 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3500 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3501 3502 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3503 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3504 3505 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3506 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3507 3508 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3509 3510 } else { /* call == MAT_INITIAL_MATRIX) */ 3511 PetscBool flg; 3512 3513 PetscCall(ISGetLocalSize(iscol,&n)); 3514 PetscCall(ISGetSize(iscol,&Ncols)); 3515 3516 /* (1) iscol -> nonscalable iscol_local */ 3517 /* Check for special case: each processor gets entire matrix columns */ 3518 PetscCall(ISIdentity(iscol_local,&flg)); 3519 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3520 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3521 if (allcolumns) { 3522 iscol_sub = iscol_local; 3523 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3524 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3525 3526 } else { 3527 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3528 PetscInt *idx,*cmap1,k; 3529 PetscCall(PetscMalloc1(Ncols,&idx)); 3530 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3531 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3532 count = 0; 3533 k = 0; 3534 for (i=0; i<Ncols; i++) { 3535 j = is_idx[i]; 3536 if (j >= cstart && j < cend) { 3537 /* diagonal part of mat */ 3538 idx[count] = j; 3539 cmap1[count++] = i; /* column index in submat */ 3540 } else if (Bn) { 3541 /* off-diagonal part of mat */ 3542 if (j == garray[k]) { 3543 idx[count] = j; 3544 cmap1[count++] = i; /* column index in submat */ 3545 } else if (j > garray[k]) { 3546 while (j > garray[k] && k < Bn-1) k++; 3547 if (j == garray[k]) { 3548 idx[count] = j; 3549 cmap1[count++] = i; /* column index in submat */ 3550 } 3551 } 3552 } 3553 } 3554 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3555 3556 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3557 PetscCall(ISGetBlockSize(iscol,&cbs)); 3558 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3559 3560 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3561 } 3562 3563 /* (3) Create sequential Msub */ 3564 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3565 } 3566 3567 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3568 aij = (Mat_SeqAIJ*)(Msub)->data; 3569 ii = aij->i; 3570 PetscCall(ISGetIndices(iscmap,&cmap)); 3571 3572 /* 3573 m - number of local rows 3574 Ncols - number of columns (same on all processors) 3575 rstart - first row in new global matrix generated 3576 */ 3577 PetscCall(MatGetSize(Msub,&m,NULL)); 3578 3579 if (call == MAT_INITIAL_MATRIX) { 3580 /* (4) Create parallel newmat */ 3581 PetscMPIInt rank,size; 3582 PetscInt csize; 3583 3584 PetscCallMPI(MPI_Comm_size(comm,&size)); 3585 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3586 3587 /* 3588 Determine the number of non-zeros in the diagonal and off-diagonal 3589 portions of the matrix in order to do correct preallocation 3590 */ 3591 3592 /* first get start and end of "diagonal" columns */ 3593 PetscCall(ISGetLocalSize(iscol,&csize)); 3594 if (csize == PETSC_DECIDE) { 3595 PetscCall(ISGetSize(isrow,&mglobal)); 3596 if (mglobal == Ncols) { /* square matrix */ 3597 nlocal = m; 3598 } else { 3599 nlocal = Ncols/size + ((Ncols % size) > rank); 3600 } 3601 } else { 3602 nlocal = csize; 3603 } 3604 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3605 rstart = rend - nlocal; 3606 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3607 3608 /* next, compute all the lengths */ 3609 jj = aij->j; 3610 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3611 olens = dlens + m; 3612 for (i=0; i<m; i++) { 3613 jend = ii[i+1] - ii[i]; 3614 olen = 0; 3615 dlen = 0; 3616 for (j=0; j<jend; j++) { 3617 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3618 else dlen++; 3619 jj++; 3620 } 3621 olens[i] = olen; 3622 dlens[i] = dlen; 3623 } 3624 3625 PetscCall(ISGetBlockSize(isrow,&bs)); 3626 PetscCall(ISGetBlockSize(iscol,&cbs)); 3627 3628 PetscCall(MatCreate(comm,&M)); 3629 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3630 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3631 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3632 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3633 PetscCall(PetscFree(dlens)); 3634 3635 } else { /* call == MAT_REUSE_MATRIX */ 3636 M = *newmat; 3637 PetscCall(MatGetLocalSize(M,&i,NULL)); 3638 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3639 PetscCall(MatZeroEntries(M)); 3640 /* 3641 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3642 rather than the slower MatSetValues(). 3643 */ 3644 M->was_assembled = PETSC_TRUE; 3645 M->assembled = PETSC_FALSE; 3646 } 3647 3648 /* (5) Set values of Msub to *newmat */ 3649 PetscCall(PetscMalloc1(count,&colsub)); 3650 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3651 3652 jj = aij->j; 3653 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3654 for (i=0; i<m; i++) { 3655 row = rstart + i; 3656 nz = ii[i+1] - ii[i]; 3657 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3658 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3659 jj += nz; aa += nz; 3660 } 3661 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3662 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3663 3664 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3665 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3666 3667 PetscCall(PetscFree(colsub)); 3668 3669 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3670 if (call == MAT_INITIAL_MATRIX) { 3671 *newmat = M; 3672 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3673 PetscCall(MatDestroy(&Msub)); 3674 3675 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3676 PetscCall(ISDestroy(&iscol_sub)); 3677 3678 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3679 PetscCall(ISDestroy(&iscmap)); 3680 3681 if (iscol_local) { 3682 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3683 PetscCall(ISDestroy(&iscol_local)); 3684 } 3685 } 3686 PetscFunctionReturn(0); 3687 } 3688 3689 /* 3690 Not great since it makes two copies of the submatrix, first an SeqAIJ 3691 in local and then by concatenating the local matrices the end result. 3692 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3693 3694 Note: This requires a sequential iscol with all indices. 3695 */ 3696 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3697 { 3698 PetscMPIInt rank,size; 3699 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3700 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3701 Mat M,Mreuse; 3702 MatScalar *aa,*vwork; 3703 MPI_Comm comm; 3704 Mat_SeqAIJ *aij; 3705 PetscBool colflag,allcolumns=PETSC_FALSE; 3706 3707 PetscFunctionBegin; 3708 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3709 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3710 PetscCallMPI(MPI_Comm_size(comm,&size)); 3711 3712 /* Check for special case: each processor gets entire matrix columns */ 3713 PetscCall(ISIdentity(iscol,&colflag)); 3714 PetscCall(ISGetLocalSize(iscol,&n)); 3715 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3716 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3717 3718 if (call == MAT_REUSE_MATRIX) { 3719 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3720 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3721 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3722 } else { 3723 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3724 } 3725 3726 /* 3727 m - number of local rows 3728 n - number of columns (same on all processors) 3729 rstart - first row in new global matrix generated 3730 */ 3731 PetscCall(MatGetSize(Mreuse,&m,&n)); 3732 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3733 if (call == MAT_INITIAL_MATRIX) { 3734 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3735 ii = aij->i; 3736 jj = aij->j; 3737 3738 /* 3739 Determine the number of non-zeros in the diagonal and off-diagonal 3740 portions of the matrix in order to do correct preallocation 3741 */ 3742 3743 /* first get start and end of "diagonal" columns */ 3744 if (csize == PETSC_DECIDE) { 3745 PetscCall(ISGetSize(isrow,&mglobal)); 3746 if (mglobal == n) { /* square matrix */ 3747 nlocal = m; 3748 } else { 3749 nlocal = n/size + ((n % size) > rank); 3750 } 3751 } else { 3752 nlocal = csize; 3753 } 3754 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3755 rstart = rend - nlocal; 3756 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3757 3758 /* next, compute all the lengths */ 3759 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3760 olens = dlens + m; 3761 for (i=0; i<m; i++) { 3762 jend = ii[i+1] - ii[i]; 3763 olen = 0; 3764 dlen = 0; 3765 for (j=0; j<jend; j++) { 3766 if (*jj < rstart || *jj >= rend) olen++; 3767 else dlen++; 3768 jj++; 3769 } 3770 olens[i] = olen; 3771 dlens[i] = dlen; 3772 } 3773 PetscCall(MatCreate(comm,&M)); 3774 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3775 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3776 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3777 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3778 PetscCall(PetscFree(dlens)); 3779 } else { 3780 PetscInt ml,nl; 3781 3782 M = *newmat; 3783 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3784 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3785 PetscCall(MatZeroEntries(M)); 3786 /* 3787 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3788 rather than the slower MatSetValues(). 3789 */ 3790 M->was_assembled = PETSC_TRUE; 3791 M->assembled = PETSC_FALSE; 3792 } 3793 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3794 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3795 ii = aij->i; 3796 jj = aij->j; 3797 3798 /* trigger copy to CPU if needed */ 3799 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3800 for (i=0; i<m; i++) { 3801 row = rstart + i; 3802 nz = ii[i+1] - ii[i]; 3803 cwork = jj; jj += nz; 3804 vwork = aa; aa += nz; 3805 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3806 } 3807 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3808 3809 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3810 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3811 *newmat = M; 3812 3813 /* save submatrix used in processor for next request */ 3814 if (call == MAT_INITIAL_MATRIX) { 3815 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3816 PetscCall(MatDestroy(&Mreuse)); 3817 } 3818 PetscFunctionReturn(0); 3819 } 3820 3821 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3822 { 3823 PetscInt m,cstart, cend,j,nnz,i,d; 3824 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3825 const PetscInt *JJ; 3826 PetscBool nooffprocentries; 3827 3828 PetscFunctionBegin; 3829 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3830 3831 PetscCall(PetscLayoutSetUp(B->rmap)); 3832 PetscCall(PetscLayoutSetUp(B->cmap)); 3833 m = B->rmap->n; 3834 cstart = B->cmap->rstart; 3835 cend = B->cmap->rend; 3836 rstart = B->rmap->rstart; 3837 3838 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3839 3840 if (PetscDefined(USE_DEBUG)) { 3841 for (i=0; i<m; i++) { 3842 nnz = Ii[i+1]- Ii[i]; 3843 JJ = J + Ii[i]; 3844 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3845 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3846 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3847 } 3848 } 3849 3850 for (i=0; i<m; i++) { 3851 nnz = Ii[i+1]- Ii[i]; 3852 JJ = J + Ii[i]; 3853 nnz_max = PetscMax(nnz_max,nnz); 3854 d = 0; 3855 for (j=0; j<nnz; j++) { 3856 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3857 } 3858 d_nnz[i] = d; 3859 o_nnz[i] = nnz - d; 3860 } 3861 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3862 PetscCall(PetscFree2(d_nnz,o_nnz)); 3863 3864 for (i=0; i<m; i++) { 3865 ii = i + rstart; 3866 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3867 } 3868 nooffprocentries = B->nooffprocentries; 3869 B->nooffprocentries = PETSC_TRUE; 3870 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3871 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3872 B->nooffprocentries = nooffprocentries; 3873 3874 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3875 PetscFunctionReturn(0); 3876 } 3877 3878 /*@ 3879 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3880 (the default parallel PETSc format). 3881 3882 Collective 3883 3884 Input Parameters: 3885 + B - the matrix 3886 . i - the indices into j for the start of each local row (starts with zero) 3887 . j - the column indices for each local row (starts with zero) 3888 - v - optional values in the matrix 3889 3890 Level: developer 3891 3892 Notes: 3893 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3894 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3895 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3896 3897 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3898 3899 The format which is used for the sparse matrix input, is equivalent to a 3900 row-major ordering.. i.e for the following matrix, the input data expected is 3901 as shown 3902 3903 $ 1 0 0 3904 $ 2 0 3 P0 3905 $ ------- 3906 $ 4 5 6 P1 3907 $ 3908 $ Process0 [P0]: rows_owned=[0,1] 3909 $ i = {0,1,3} [size = nrow+1 = 2+1] 3910 $ j = {0,0,2} [size = 3] 3911 $ v = {1,2,3} [size = 3] 3912 $ 3913 $ Process1 [P1]: rows_owned=[2] 3914 $ i = {0,3} [size = nrow+1 = 1+1] 3915 $ j = {0,1,2} [size = 3] 3916 $ v = {4,5,6} [size = 3] 3917 3918 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3919 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3920 @*/ 3921 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3922 { 3923 PetscFunctionBegin; 3924 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3925 PetscFunctionReturn(0); 3926 } 3927 3928 /*@C 3929 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3930 (the default parallel PETSc format). For good matrix assembly performance 3931 the user should preallocate the matrix storage by setting the parameters 3932 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3933 performance can be increased by more than a factor of 50. 3934 3935 Collective 3936 3937 Input Parameters: 3938 + B - the matrix 3939 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3940 (same value is used for all local rows) 3941 . d_nnz - array containing the number of nonzeros in the various rows of the 3942 DIAGONAL portion of the local submatrix (possibly different for each row) 3943 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3944 The size of this array is equal to the number of local rows, i.e 'm'. 3945 For matrices that will be factored, you must leave room for (and set) 3946 the diagonal entry even if it is zero. 3947 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3948 submatrix (same value is used for all local rows). 3949 - o_nnz - array containing the number of nonzeros in the various rows of the 3950 OFF-DIAGONAL portion of the local submatrix (possibly different for 3951 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3952 structure. The size of this array is equal to the number 3953 of local rows, i.e 'm'. 3954 3955 If the *_nnz parameter is given then the *_nz parameter is ignored 3956 3957 The AIJ format (also called the Yale sparse matrix format or 3958 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3959 storage. The stored row and column indices begin with zero. 3960 See Users-Manual: ch_mat for details. 3961 3962 The parallel matrix is partitioned such that the first m0 rows belong to 3963 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3964 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3965 3966 The DIAGONAL portion of the local submatrix of a processor can be defined 3967 as the submatrix which is obtained by extraction the part corresponding to 3968 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3969 first row that belongs to the processor, r2 is the last row belonging to 3970 the this processor, and c1-c2 is range of indices of the local part of a 3971 vector suitable for applying the matrix to. This is an mxn matrix. In the 3972 common case of a square matrix, the row and column ranges are the same and 3973 the DIAGONAL part is also square. The remaining portion of the local 3974 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3975 3976 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3977 3978 You can call MatGetInfo() to get information on how effective the preallocation was; 3979 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3980 You can also run with the option -info and look for messages with the string 3981 malloc in them to see if additional memory allocation was needed. 3982 3983 Example usage: 3984 3985 Consider the following 8x8 matrix with 34 non-zero values, that is 3986 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3987 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3988 as follows: 3989 3990 .vb 3991 1 2 0 | 0 3 0 | 0 4 3992 Proc0 0 5 6 | 7 0 0 | 8 0 3993 9 0 10 | 11 0 0 | 12 0 3994 ------------------------------------- 3995 13 0 14 | 15 16 17 | 0 0 3996 Proc1 0 18 0 | 19 20 21 | 0 0 3997 0 0 0 | 22 23 0 | 24 0 3998 ------------------------------------- 3999 Proc2 25 26 27 | 0 0 28 | 29 0 4000 30 0 0 | 31 32 33 | 0 34 4001 .ve 4002 4003 This can be represented as a collection of submatrices as: 4004 4005 .vb 4006 A B C 4007 D E F 4008 G H I 4009 .ve 4010 4011 Where the submatrices A,B,C are owned by proc0, D,E,F are 4012 owned by proc1, G,H,I are owned by proc2. 4013 4014 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4015 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4016 The 'M','N' parameters are 8,8, and have the same values on all procs. 4017 4018 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4019 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4020 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4021 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4022 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4023 matrix, ans [DF] as another SeqAIJ matrix. 4024 4025 When d_nz, o_nz parameters are specified, d_nz storage elements are 4026 allocated for every row of the local diagonal submatrix, and o_nz 4027 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4028 One way to choose d_nz and o_nz is to use the max nonzerors per local 4029 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4030 In this case, the values of d_nz,o_nz are: 4031 .vb 4032 proc0 : dnz = 2, o_nz = 2 4033 proc1 : dnz = 3, o_nz = 2 4034 proc2 : dnz = 1, o_nz = 4 4035 .ve 4036 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4037 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4038 for proc3. i.e we are using 12+15+10=37 storage locations to store 4039 34 values. 4040 4041 When d_nnz, o_nnz parameters are specified, the storage is specified 4042 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4043 In the above case the values for d_nnz,o_nnz are: 4044 .vb 4045 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4046 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4047 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4048 .ve 4049 Here the space allocated is sum of all the above values i.e 34, and 4050 hence pre-allocation is perfect. 4051 4052 Level: intermediate 4053 4054 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4055 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4056 @*/ 4057 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4058 { 4059 PetscFunctionBegin; 4060 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4061 PetscValidType(B,1); 4062 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4063 PetscFunctionReturn(0); 4064 } 4065 4066 /*@ 4067 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4068 CSR format for the local rows. 4069 4070 Collective 4071 4072 Input Parameters: 4073 + comm - MPI communicator 4074 . m - number of local rows (Cannot be PETSC_DECIDE) 4075 . n - This value should be the same as the local size used in creating the 4076 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4077 calculated if N is given) For square matrices n is almost always m. 4078 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4079 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4080 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4081 . j - column indices 4082 - a - matrix values 4083 4084 Output Parameter: 4085 . mat - the matrix 4086 4087 Level: intermediate 4088 4089 Notes: 4090 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4091 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4092 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4093 4094 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4095 4096 The format which is used for the sparse matrix input, is equivalent to a 4097 row-major ordering.. i.e for the following matrix, the input data expected is 4098 as shown 4099 4100 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4101 4102 $ 1 0 0 4103 $ 2 0 3 P0 4104 $ ------- 4105 $ 4 5 6 P1 4106 $ 4107 $ Process0 [P0]: rows_owned=[0,1] 4108 $ i = {0,1,3} [size = nrow+1 = 2+1] 4109 $ j = {0,0,2} [size = 3] 4110 $ v = {1,2,3} [size = 3] 4111 $ 4112 $ Process1 [P1]: rows_owned=[2] 4113 $ i = {0,3} [size = nrow+1 = 1+1] 4114 $ j = {0,1,2} [size = 3] 4115 $ v = {4,5,6} [size = 3] 4116 4117 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4118 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4119 @*/ 4120 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4121 { 4122 PetscFunctionBegin; 4123 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4124 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4125 PetscCall(MatCreate(comm,mat)); 4126 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4127 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4128 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4129 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4130 PetscFunctionReturn(0); 4131 } 4132 4133 /*@ 4134 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4135 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4136 4137 Collective 4138 4139 Input Parameters: 4140 + mat - the matrix 4141 . m - number of local rows (Cannot be PETSC_DECIDE) 4142 . n - This value should be the same as the local size used in creating the 4143 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4144 calculated if N is given) For square matrices n is almost always m. 4145 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4146 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4147 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4148 . J - column indices 4149 - v - matrix values 4150 4151 Level: intermediate 4152 4153 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4154 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4155 @*/ 4156 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4157 { 4158 PetscInt cstart,nnz,i,j; 4159 PetscInt *ld; 4160 PetscBool nooffprocentries; 4161 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4162 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4163 PetscScalar *ad,*ao; 4164 const PetscInt *Adi = Ad->i; 4165 PetscInt ldi,Iii,md; 4166 4167 PetscFunctionBegin; 4168 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4169 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4170 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4171 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4172 4173 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4174 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4175 cstart = mat->cmap->rstart; 4176 if (!Aij->ld) { 4177 /* count number of entries below block diagonal */ 4178 PetscCall(PetscCalloc1(m,&ld)); 4179 Aij->ld = ld; 4180 for (i=0; i<m; i++) { 4181 nnz = Ii[i+1]- Ii[i]; 4182 j = 0; 4183 while (J[j] < cstart && j < nnz) {j++;} 4184 J += nnz; 4185 ld[i] = j; 4186 } 4187 } else { 4188 ld = Aij->ld; 4189 } 4190 4191 for (i=0; i<m; i++) { 4192 nnz = Ii[i+1]- Ii[i]; 4193 Iii = Ii[i]; 4194 ldi = ld[i]; 4195 md = Adi[i+1]-Adi[i]; 4196 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4197 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4198 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4199 ad += md; 4200 ao += nnz - md; 4201 } 4202 nooffprocentries = mat->nooffprocentries; 4203 mat->nooffprocentries = PETSC_TRUE; 4204 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4205 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4206 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4207 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4208 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4209 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4210 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4211 mat->nooffprocentries = nooffprocentries; 4212 PetscFunctionReturn(0); 4213 } 4214 4215 /*@C 4216 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4217 (the default parallel PETSc format). For good matrix assembly performance 4218 the user should preallocate the matrix storage by setting the parameters 4219 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4220 performance can be increased by more than a factor of 50. 4221 4222 Collective 4223 4224 Input Parameters: 4225 + comm - MPI communicator 4226 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4227 This value should be the same as the local size used in creating the 4228 y vector for the matrix-vector product y = Ax. 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4233 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4234 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4235 (same value is used for all local rows) 4236 . d_nnz - array containing the number of nonzeros in the various rows of the 4237 DIAGONAL portion of the local submatrix (possibly different for each row) 4238 or NULL, if d_nz is used to specify the nonzero structure. 4239 The size of this array is equal to the number of local rows, i.e 'm'. 4240 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4241 submatrix (same value is used for all local rows). 4242 - o_nnz - array containing the number of nonzeros in the various rows of the 4243 OFF-DIAGONAL portion of the local submatrix (possibly different for 4244 each row) or NULL, if o_nz is used to specify the nonzero 4245 structure. The size of this array is equal to the number 4246 of local rows, i.e 'm'. 4247 4248 Output Parameter: 4249 . A - the matrix 4250 4251 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4252 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4253 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4254 4255 Notes: 4256 If the *_nnz parameter is given then the *_nz parameter is ignored 4257 4258 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4259 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4260 storage requirements for this matrix. 4261 4262 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4263 processor than it must be used on all processors that share the object for 4264 that argument. 4265 4266 The user MUST specify either the local or global matrix dimensions 4267 (possibly both). 4268 4269 The parallel matrix is partitioned across processors such that the 4270 first m0 rows belong to process 0, the next m1 rows belong to 4271 process 1, the next m2 rows belong to process 2 etc.. where 4272 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4273 values corresponding to [m x N] submatrix. 4274 4275 The columns are logically partitioned with the n0 columns belonging 4276 to 0th partition, the next n1 columns belonging to the next 4277 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4278 4279 The DIAGONAL portion of the local submatrix on any given processor 4280 is the submatrix corresponding to the rows and columns m,n 4281 corresponding to the given processor. i.e diagonal matrix on 4282 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4283 etc. The remaining portion of the local submatrix [m x (N-n)] 4284 constitute the OFF-DIAGONAL portion. The example below better 4285 illustrates this concept. 4286 4287 For a square global matrix we define each processor's diagonal portion 4288 to be its local rows and the corresponding columns (a square submatrix); 4289 each processor's off-diagonal portion encompasses the remainder of the 4290 local matrix (a rectangular submatrix). 4291 4292 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4293 4294 When calling this routine with a single process communicator, a matrix of 4295 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4296 type of communicator, use the construction mechanism 4297 .vb 4298 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4299 .ve 4300 4301 $ MatCreate(...,&A); 4302 $ MatSetType(A,MATMPIAIJ); 4303 $ MatSetSizes(A, m,n,M,N); 4304 $ MatMPIAIJSetPreallocation(A,...); 4305 4306 By default, this format uses inodes (identical nodes) when possible. 4307 We search for consecutive rows with the same nonzero structure, thereby 4308 reusing matrix information to achieve increased efficiency. 4309 4310 Options Database Keys: 4311 + -mat_no_inode - Do not use inodes 4312 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4313 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4314 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4315 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4316 4317 Example usage: 4318 4319 Consider the following 8x8 matrix with 34 non-zero values, that is 4320 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4321 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4322 as follows 4323 4324 .vb 4325 1 2 0 | 0 3 0 | 0 4 4326 Proc0 0 5 6 | 7 0 0 | 8 0 4327 9 0 10 | 11 0 0 | 12 0 4328 ------------------------------------- 4329 13 0 14 | 15 16 17 | 0 0 4330 Proc1 0 18 0 | 19 20 21 | 0 0 4331 0 0 0 | 22 23 0 | 24 0 4332 ------------------------------------- 4333 Proc2 25 26 27 | 0 0 28 | 29 0 4334 30 0 0 | 31 32 33 | 0 34 4335 .ve 4336 4337 This can be represented as a collection of submatrices as 4338 4339 .vb 4340 A B C 4341 D E F 4342 G H I 4343 .ve 4344 4345 Where the submatrices A,B,C are owned by proc0, D,E,F are 4346 owned by proc1, G,H,I are owned by proc2. 4347 4348 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4349 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4350 The 'M','N' parameters are 8,8, and have the same values on all procs. 4351 4352 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4353 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4354 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4355 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4356 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4357 matrix, ans [DF] as another SeqAIJ matrix. 4358 4359 When d_nz, o_nz parameters are specified, d_nz storage elements are 4360 allocated for every row of the local diagonal submatrix, and o_nz 4361 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4362 One way to choose d_nz and o_nz is to use the max nonzerors per local 4363 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4364 In this case, the values of d_nz,o_nz are 4365 .vb 4366 proc0 : dnz = 2, o_nz = 2 4367 proc1 : dnz = 3, o_nz = 2 4368 proc2 : dnz = 1, o_nz = 4 4369 .ve 4370 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4371 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4372 for proc3. i.e we are using 12+15+10=37 storage locations to store 4373 34 values. 4374 4375 When d_nnz, o_nnz parameters are specified, the storage is specified 4376 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4377 In the above case the values for d_nnz,o_nnz are 4378 .vb 4379 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4380 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4381 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4382 .ve 4383 Here the space allocated is sum of all the above values i.e 34, and 4384 hence pre-allocation is perfect. 4385 4386 Level: intermediate 4387 4388 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4389 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4390 @*/ 4391 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4392 { 4393 PetscMPIInt size; 4394 4395 PetscFunctionBegin; 4396 PetscCall(MatCreate(comm,A)); 4397 PetscCall(MatSetSizes(*A,m,n,M,N)); 4398 PetscCallMPI(MPI_Comm_size(comm,&size)); 4399 if (size > 1) { 4400 PetscCall(MatSetType(*A,MATMPIAIJ)); 4401 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4402 } else { 4403 PetscCall(MatSetType(*A,MATSEQAIJ)); 4404 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4405 } 4406 PetscFunctionReturn(0); 4407 } 4408 4409 /*@C 4410 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4411 4412 Not collective 4413 4414 Input Parameter: 4415 . A - The MPIAIJ matrix 4416 4417 Output Parameters: 4418 + Ad - The local diagonal block as a SeqAIJ matrix 4419 . Ao - The local off-diagonal block as a SeqAIJ matrix 4420 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4421 4422 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4423 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4424 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4425 local column numbers to global column numbers in the original matrix. 4426 4427 Level: intermediate 4428 4429 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4430 @*/ 4431 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4432 { 4433 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4434 PetscBool flg; 4435 4436 PetscFunctionBegin; 4437 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4438 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4439 if (Ad) *Ad = a->A; 4440 if (Ao) *Ao = a->B; 4441 if (colmap) *colmap = a->garray; 4442 PetscFunctionReturn(0); 4443 } 4444 4445 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4446 { 4447 PetscInt m,N,i,rstart,nnz,Ii; 4448 PetscInt *indx; 4449 PetscScalar *values; 4450 MatType rootType; 4451 4452 PetscFunctionBegin; 4453 PetscCall(MatGetSize(inmat,&m,&N)); 4454 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4455 PetscInt *dnz,*onz,sum,bs,cbs; 4456 4457 if (n == PETSC_DECIDE) { 4458 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4459 } 4460 /* Check sum(n) = N */ 4461 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4462 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4463 4464 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4465 rstart -= m; 4466 4467 MatPreallocateBegin(comm,m,n,dnz,onz); 4468 for (i=0; i<m; i++) { 4469 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4470 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4471 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4472 } 4473 4474 PetscCall(MatCreate(comm,outmat)); 4475 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4476 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4477 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4478 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4479 PetscCall(MatSetType(*outmat,rootType)); 4480 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4481 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4482 MatPreallocateEnd(dnz,onz); 4483 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4484 } 4485 4486 /* numeric phase */ 4487 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4488 for (i=0; i<m; i++) { 4489 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4490 Ii = i + rstart; 4491 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4492 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4493 } 4494 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4495 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4496 PetscFunctionReturn(0); 4497 } 4498 4499 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4500 { 4501 PetscMPIInt rank; 4502 PetscInt m,N,i,rstart,nnz; 4503 size_t len; 4504 const PetscInt *indx; 4505 PetscViewer out; 4506 char *name; 4507 Mat B; 4508 const PetscScalar *values; 4509 4510 PetscFunctionBegin; 4511 PetscCall(MatGetLocalSize(A,&m,NULL)); 4512 PetscCall(MatGetSize(A,NULL,&N)); 4513 /* Should this be the type of the diagonal block of A? */ 4514 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4515 PetscCall(MatSetSizes(B,m,N,m,N)); 4516 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4517 PetscCall(MatSetType(B,MATSEQAIJ)); 4518 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4519 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4520 for (i=0; i<m; i++) { 4521 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4522 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4523 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4524 } 4525 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4526 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4527 4528 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4529 PetscCall(PetscStrlen(outfile,&len)); 4530 PetscCall(PetscMalloc1(len+6,&name)); 4531 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4532 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4533 PetscCall(PetscFree(name)); 4534 PetscCall(MatView(B,out)); 4535 PetscCall(PetscViewerDestroy(&out)); 4536 PetscCall(MatDestroy(&B)); 4537 PetscFunctionReturn(0); 4538 } 4539 4540 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4541 { 4542 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4543 4544 PetscFunctionBegin; 4545 if (!merge) PetscFunctionReturn(0); 4546 PetscCall(PetscFree(merge->id_r)); 4547 PetscCall(PetscFree(merge->len_s)); 4548 PetscCall(PetscFree(merge->len_r)); 4549 PetscCall(PetscFree(merge->bi)); 4550 PetscCall(PetscFree(merge->bj)); 4551 PetscCall(PetscFree(merge->buf_ri[0])); 4552 PetscCall(PetscFree(merge->buf_ri)); 4553 PetscCall(PetscFree(merge->buf_rj[0])); 4554 PetscCall(PetscFree(merge->buf_rj)); 4555 PetscCall(PetscFree(merge->coi)); 4556 PetscCall(PetscFree(merge->coj)); 4557 PetscCall(PetscFree(merge->owners_co)); 4558 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4559 PetscCall(PetscFree(merge)); 4560 PetscFunctionReturn(0); 4561 } 4562 4563 #include <../src/mat/utils/freespace.h> 4564 #include <petscbt.h> 4565 4566 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4567 { 4568 MPI_Comm comm; 4569 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4570 PetscMPIInt size,rank,taga,*len_s; 4571 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4572 PetscInt proc,m; 4573 PetscInt **buf_ri,**buf_rj; 4574 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4575 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4576 MPI_Request *s_waits,*r_waits; 4577 MPI_Status *status; 4578 const MatScalar *aa,*a_a; 4579 MatScalar **abuf_r,*ba_i; 4580 Mat_Merge_SeqsToMPI *merge; 4581 PetscContainer container; 4582 4583 PetscFunctionBegin; 4584 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4585 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4586 4587 PetscCallMPI(MPI_Comm_size(comm,&size)); 4588 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4589 4590 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4591 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4592 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4593 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4594 aa = a_a; 4595 4596 bi = merge->bi; 4597 bj = merge->bj; 4598 buf_ri = merge->buf_ri; 4599 buf_rj = merge->buf_rj; 4600 4601 PetscCall(PetscMalloc1(size,&status)); 4602 owners = merge->rowmap->range; 4603 len_s = merge->len_s; 4604 4605 /* send and recv matrix values */ 4606 /*-----------------------------*/ 4607 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4608 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4609 4610 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4611 for (proc=0,k=0; proc<size; proc++) { 4612 if (!len_s[proc]) continue; 4613 i = owners[proc]; 4614 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4615 k++; 4616 } 4617 4618 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4619 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4620 PetscCall(PetscFree(status)); 4621 4622 PetscCall(PetscFree(s_waits)); 4623 PetscCall(PetscFree(r_waits)); 4624 4625 /* insert mat values of mpimat */ 4626 /*----------------------------*/ 4627 PetscCall(PetscMalloc1(N,&ba_i)); 4628 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4629 4630 for (k=0; k<merge->nrecv; k++) { 4631 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4632 nrows = *(buf_ri_k[k]); 4633 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4634 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4635 } 4636 4637 /* set values of ba */ 4638 m = merge->rowmap->n; 4639 for (i=0; i<m; i++) { 4640 arow = owners[rank] + i; 4641 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4642 bnzi = bi[i+1] - bi[i]; 4643 PetscCall(PetscArrayzero(ba_i,bnzi)); 4644 4645 /* add local non-zero vals of this proc's seqmat into ba */ 4646 anzi = ai[arow+1] - ai[arow]; 4647 aj = a->j + ai[arow]; 4648 aa = a_a + ai[arow]; 4649 nextaj = 0; 4650 for (j=0; nextaj<anzi; j++) { 4651 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4652 ba_i[j] += aa[nextaj++]; 4653 } 4654 } 4655 4656 /* add received vals into ba */ 4657 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4658 /* i-th row */ 4659 if (i == *nextrow[k]) { 4660 anzi = *(nextai[k]+1) - *nextai[k]; 4661 aj = buf_rj[k] + *(nextai[k]); 4662 aa = abuf_r[k] + *(nextai[k]); 4663 nextaj = 0; 4664 for (j=0; nextaj<anzi; j++) { 4665 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4666 ba_i[j] += aa[nextaj++]; 4667 } 4668 } 4669 nextrow[k]++; nextai[k]++; 4670 } 4671 } 4672 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4673 } 4674 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4675 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4676 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4677 4678 PetscCall(PetscFree(abuf_r[0])); 4679 PetscCall(PetscFree(abuf_r)); 4680 PetscCall(PetscFree(ba_i)); 4681 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4682 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4687 { 4688 Mat B_mpi; 4689 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4690 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4691 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4692 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4693 PetscInt len,proc,*dnz,*onz,bs,cbs; 4694 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4695 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4696 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4697 MPI_Status *status; 4698 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4699 PetscBT lnkbt; 4700 Mat_Merge_SeqsToMPI *merge; 4701 PetscContainer container; 4702 4703 PetscFunctionBegin; 4704 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4705 4706 /* make sure it is a PETSc comm */ 4707 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4708 PetscCallMPI(MPI_Comm_size(comm,&size)); 4709 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4710 4711 PetscCall(PetscNew(&merge)); 4712 PetscCall(PetscMalloc1(size,&status)); 4713 4714 /* determine row ownership */ 4715 /*---------------------------------------------------------*/ 4716 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4717 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4718 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4719 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4720 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4721 PetscCall(PetscMalloc1(size,&len_si)); 4722 PetscCall(PetscMalloc1(size,&merge->len_s)); 4723 4724 m = merge->rowmap->n; 4725 owners = merge->rowmap->range; 4726 4727 /* determine the number of messages to send, their lengths */ 4728 /*---------------------------------------------------------*/ 4729 len_s = merge->len_s; 4730 4731 len = 0; /* length of buf_si[] */ 4732 merge->nsend = 0; 4733 for (proc=0; proc<size; proc++) { 4734 len_si[proc] = 0; 4735 if (proc == rank) { 4736 len_s[proc] = 0; 4737 } else { 4738 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4739 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4740 } 4741 if (len_s[proc]) { 4742 merge->nsend++; 4743 nrows = 0; 4744 for (i=owners[proc]; i<owners[proc+1]; i++) { 4745 if (ai[i+1] > ai[i]) nrows++; 4746 } 4747 len_si[proc] = 2*(nrows+1); 4748 len += len_si[proc]; 4749 } 4750 } 4751 4752 /* determine the number and length of messages to receive for ij-structure */ 4753 /*-------------------------------------------------------------------------*/ 4754 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4755 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4756 4757 /* post the Irecv of j-structure */ 4758 /*-------------------------------*/ 4759 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4760 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4761 4762 /* post the Isend of j-structure */ 4763 /*--------------------------------*/ 4764 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4765 4766 for (proc=0, k=0; proc<size; proc++) { 4767 if (!len_s[proc]) continue; 4768 i = owners[proc]; 4769 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4770 k++; 4771 } 4772 4773 /* receives and sends of j-structure are complete */ 4774 /*------------------------------------------------*/ 4775 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4776 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4777 4778 /* send and recv i-structure */ 4779 /*---------------------------*/ 4780 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4781 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4782 4783 PetscCall(PetscMalloc1(len+1,&buf_s)); 4784 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4785 for (proc=0,k=0; proc<size; proc++) { 4786 if (!len_s[proc]) continue; 4787 /* form outgoing message for i-structure: 4788 buf_si[0]: nrows to be sent 4789 [1:nrows]: row index (global) 4790 [nrows+1:2*nrows+1]: i-structure index 4791 */ 4792 /*-------------------------------------------*/ 4793 nrows = len_si[proc]/2 - 1; 4794 buf_si_i = buf_si + nrows+1; 4795 buf_si[0] = nrows; 4796 buf_si_i[0] = 0; 4797 nrows = 0; 4798 for (i=owners[proc]; i<owners[proc+1]; i++) { 4799 anzi = ai[i+1] - ai[i]; 4800 if (anzi) { 4801 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4802 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4803 nrows++; 4804 } 4805 } 4806 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4807 k++; 4808 buf_si += len_si[proc]; 4809 } 4810 4811 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4812 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4813 4814 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4815 for (i=0; i<merge->nrecv; i++) { 4816 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4817 } 4818 4819 PetscCall(PetscFree(len_si)); 4820 PetscCall(PetscFree(len_ri)); 4821 PetscCall(PetscFree(rj_waits)); 4822 PetscCall(PetscFree2(si_waits,sj_waits)); 4823 PetscCall(PetscFree(ri_waits)); 4824 PetscCall(PetscFree(buf_s)); 4825 PetscCall(PetscFree(status)); 4826 4827 /* compute a local seq matrix in each processor */ 4828 /*----------------------------------------------*/ 4829 /* allocate bi array and free space for accumulating nonzero column info */ 4830 PetscCall(PetscMalloc1(m+1,&bi)); 4831 bi[0] = 0; 4832 4833 /* create and initialize a linked list */ 4834 nlnk = N+1; 4835 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4836 4837 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4838 len = ai[owners[rank+1]] - ai[owners[rank]]; 4839 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4840 4841 current_space = free_space; 4842 4843 /* determine symbolic info for each local row */ 4844 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4845 4846 for (k=0; k<merge->nrecv; k++) { 4847 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4848 nrows = *buf_ri_k[k]; 4849 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4850 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4851 } 4852 4853 MatPreallocateBegin(comm,m,n,dnz,onz); 4854 len = 0; 4855 for (i=0; i<m; i++) { 4856 bnzi = 0; 4857 /* add local non-zero cols of this proc's seqmat into lnk */ 4858 arow = owners[rank] + i; 4859 anzi = ai[arow+1] - ai[arow]; 4860 aj = a->j + ai[arow]; 4861 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4862 bnzi += nlnk; 4863 /* add received col data into lnk */ 4864 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4865 if (i == *nextrow[k]) { /* i-th row */ 4866 anzi = *(nextai[k]+1) - *nextai[k]; 4867 aj = buf_rj[k] + *nextai[k]; 4868 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4869 bnzi += nlnk; 4870 nextrow[k]++; nextai[k]++; 4871 } 4872 } 4873 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4874 4875 /* if free space is not available, make more free space */ 4876 if (current_space->local_remaining<bnzi) { 4877 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4878 nspacedouble++; 4879 } 4880 /* copy data into free space, then initialize lnk */ 4881 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4882 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4883 4884 current_space->array += bnzi; 4885 current_space->local_used += bnzi; 4886 current_space->local_remaining -= bnzi; 4887 4888 bi[i+1] = bi[i] + bnzi; 4889 } 4890 4891 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4892 4893 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4894 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4895 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4896 4897 /* create symbolic parallel matrix B_mpi */ 4898 /*---------------------------------------*/ 4899 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4900 PetscCall(MatCreate(comm,&B_mpi)); 4901 if (n==PETSC_DECIDE) { 4902 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4903 } else { 4904 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4905 } 4906 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4907 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4908 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4909 MatPreallocateEnd(dnz,onz); 4910 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4911 4912 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4913 B_mpi->assembled = PETSC_FALSE; 4914 merge->bi = bi; 4915 merge->bj = bj; 4916 merge->buf_ri = buf_ri; 4917 merge->buf_rj = buf_rj; 4918 merge->coi = NULL; 4919 merge->coj = NULL; 4920 merge->owners_co = NULL; 4921 4922 PetscCall(PetscCommDestroy(&comm)); 4923 4924 /* attach the supporting struct to B_mpi for reuse */ 4925 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4926 PetscCall(PetscContainerSetPointer(container,merge)); 4927 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4928 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4929 PetscCall(PetscContainerDestroy(&container)); 4930 *mpimat = B_mpi; 4931 4932 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4933 PetscFunctionReturn(0); 4934 } 4935 4936 /*@C 4937 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4938 matrices from each processor 4939 4940 Collective 4941 4942 Input Parameters: 4943 + comm - the communicators the parallel matrix will live on 4944 . seqmat - the input sequential matrices 4945 . m - number of local rows (or PETSC_DECIDE) 4946 . n - number of local columns (or PETSC_DECIDE) 4947 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4948 4949 Output Parameter: 4950 . mpimat - the parallel matrix generated 4951 4952 Level: advanced 4953 4954 Notes: 4955 The dimensions of the sequential matrix in each processor MUST be the same. 4956 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4957 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4958 @*/ 4959 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4960 { 4961 PetscMPIInt size; 4962 4963 PetscFunctionBegin; 4964 PetscCallMPI(MPI_Comm_size(comm,&size)); 4965 if (size == 1) { 4966 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4967 if (scall == MAT_INITIAL_MATRIX) { 4968 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4969 } else { 4970 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4971 } 4972 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4973 PetscFunctionReturn(0); 4974 } 4975 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4978 } 4979 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4980 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4981 PetscFunctionReturn(0); 4982 } 4983 4984 /*@ 4985 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4986 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4987 with MatGetSize() 4988 4989 Not Collective 4990 4991 Input Parameters: 4992 + A - the matrix 4993 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4994 4995 Output Parameter: 4996 . A_loc - the local sequential matrix generated 4997 4998 Level: developer 4999 5000 Notes: 5001 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5002 5003 Destroy the matrix with MatDestroy() 5004 5005 .seealso: MatMPIAIJGetLocalMat() 5006 5007 @*/ 5008 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5009 { 5010 PetscBool mpi; 5011 5012 PetscFunctionBegin; 5013 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5014 if (mpi) { 5015 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5016 } else { 5017 *A_loc = A; 5018 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5019 } 5020 PetscFunctionReturn(0); 5021 } 5022 5023 /*@ 5024 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5025 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5026 with MatGetSize() 5027 5028 Not Collective 5029 5030 Input Parameters: 5031 + A - the matrix 5032 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5033 5034 Output Parameter: 5035 . A_loc - the local sequential matrix generated 5036 5037 Level: developer 5038 5039 Notes: 5040 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5041 5042 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5043 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5044 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5045 modify the values of the returned A_loc. 5046 5047 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5048 @*/ 5049 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5050 { 5051 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5052 Mat_SeqAIJ *mat,*a,*b; 5053 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5054 const PetscScalar *aa,*ba,*aav,*bav; 5055 PetscScalar *ca,*cam; 5056 PetscMPIInt size; 5057 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5058 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5059 PetscBool match; 5060 5061 PetscFunctionBegin; 5062 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5063 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5064 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5065 if (size == 1) { 5066 if (scall == MAT_INITIAL_MATRIX) { 5067 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5068 *A_loc = mpimat->A; 5069 } else if (scall == MAT_REUSE_MATRIX) { 5070 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5071 } 5072 PetscFunctionReturn(0); 5073 } 5074 5075 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5076 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5077 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5078 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5079 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5080 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5081 aa = aav; 5082 ba = bav; 5083 if (scall == MAT_INITIAL_MATRIX) { 5084 PetscCall(PetscMalloc1(1+am,&ci)); 5085 ci[0] = 0; 5086 for (i=0; i<am; i++) { 5087 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5088 } 5089 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5090 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5091 k = 0; 5092 for (i=0; i<am; i++) { 5093 ncols_o = bi[i+1] - bi[i]; 5094 ncols_d = ai[i+1] - ai[i]; 5095 /* off-diagonal portion of A */ 5096 for (jo=0; jo<ncols_o; jo++) { 5097 col = cmap[*bj]; 5098 if (col >= cstart) break; 5099 cj[k] = col; bj++; 5100 ca[k++] = *ba++; 5101 } 5102 /* diagonal portion of A */ 5103 for (j=0; j<ncols_d; j++) { 5104 cj[k] = cstart + *aj++; 5105 ca[k++] = *aa++; 5106 } 5107 /* off-diagonal portion of A */ 5108 for (j=jo; j<ncols_o; j++) { 5109 cj[k] = cmap[*bj++]; 5110 ca[k++] = *ba++; 5111 } 5112 } 5113 /* put together the new matrix */ 5114 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5115 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5116 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5117 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5118 mat->free_a = PETSC_TRUE; 5119 mat->free_ij = PETSC_TRUE; 5120 mat->nonew = 0; 5121 } else if (scall == MAT_REUSE_MATRIX) { 5122 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5123 ci = mat->i; 5124 cj = mat->j; 5125 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5126 for (i=0; i<am; i++) { 5127 /* off-diagonal portion of A */ 5128 ncols_o = bi[i+1] - bi[i]; 5129 for (jo=0; jo<ncols_o; jo++) { 5130 col = cmap[*bj]; 5131 if (col >= cstart) break; 5132 *cam++ = *ba++; bj++; 5133 } 5134 /* diagonal portion of A */ 5135 ncols_d = ai[i+1] - ai[i]; 5136 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5137 /* off-diagonal portion of A */ 5138 for (j=jo; j<ncols_o; j++) { 5139 *cam++ = *ba++; bj++; 5140 } 5141 } 5142 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5143 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5144 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5145 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5146 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 /*@ 5151 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5152 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5153 5154 Not Collective 5155 5156 Input Parameters: 5157 + A - the matrix 5158 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5159 5160 Output Parameters: 5161 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5162 - A_loc - the local sequential matrix generated 5163 5164 Level: developer 5165 5166 Notes: 5167 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5168 5169 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5170 5171 @*/ 5172 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5173 { 5174 Mat Ao,Ad; 5175 const PetscInt *cmap; 5176 PetscMPIInt size; 5177 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5178 5179 PetscFunctionBegin; 5180 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5181 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5182 if (size == 1) { 5183 if (scall == MAT_INITIAL_MATRIX) { 5184 PetscCall(PetscObjectReference((PetscObject)Ad)); 5185 *A_loc = Ad; 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5188 } 5189 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5190 PetscFunctionReturn(0); 5191 } 5192 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5193 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5194 if (f) { 5195 PetscCall((*f)(A,scall,glob,A_loc)); 5196 } else { 5197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5198 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5199 Mat_SeqAIJ *c; 5200 PetscInt *ai = a->i, *aj = a->j; 5201 PetscInt *bi = b->i, *bj = b->j; 5202 PetscInt *ci,*cj; 5203 const PetscScalar *aa,*ba; 5204 PetscScalar *ca; 5205 PetscInt i,j,am,dn,on; 5206 5207 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5208 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5209 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5210 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5211 if (scall == MAT_INITIAL_MATRIX) { 5212 PetscInt k; 5213 PetscCall(PetscMalloc1(1+am,&ci)); 5214 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5215 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5216 ci[0] = 0; 5217 for (i=0,k=0; i<am; i++) { 5218 const PetscInt ncols_o = bi[i+1] - bi[i]; 5219 const PetscInt ncols_d = ai[i+1] - ai[i]; 5220 ci[i+1] = ci[i] + ncols_o + ncols_d; 5221 /* diagonal portion of A */ 5222 for (j=0; j<ncols_d; j++,k++) { 5223 cj[k] = *aj++; 5224 ca[k] = *aa++; 5225 } 5226 /* off-diagonal portion of A */ 5227 for (j=0; j<ncols_o; j++,k++) { 5228 cj[k] = dn + *bj++; 5229 ca[k] = *ba++; 5230 } 5231 } 5232 /* put together the new matrix */ 5233 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5234 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5235 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5236 c = (Mat_SeqAIJ*)(*A_loc)->data; 5237 c->free_a = PETSC_TRUE; 5238 c->free_ij = PETSC_TRUE; 5239 c->nonew = 0; 5240 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5241 } else if (scall == MAT_REUSE_MATRIX) { 5242 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5243 for (i=0; i<am; i++) { 5244 const PetscInt ncols_d = ai[i+1] - ai[i]; 5245 const PetscInt ncols_o = bi[i+1] - bi[i]; 5246 /* diagonal portion of A */ 5247 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5248 /* off-diagonal portion of A */ 5249 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5250 } 5251 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5252 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5253 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5254 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5255 if (glob) { 5256 PetscInt cst, *gidx; 5257 5258 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5259 PetscCall(PetscMalloc1(dn+on,&gidx)); 5260 for (i=0; i<dn; i++) gidx[i] = cst + i; 5261 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5262 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5263 } 5264 } 5265 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5266 PetscFunctionReturn(0); 5267 } 5268 5269 /*@C 5270 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5271 5272 Not Collective 5273 5274 Input Parameters: 5275 + A - the matrix 5276 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5277 - row, col - index sets of rows and columns to extract (or NULL) 5278 5279 Output Parameter: 5280 . A_loc - the local sequential matrix generated 5281 5282 Level: developer 5283 5284 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5285 5286 @*/ 5287 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5288 { 5289 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5290 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5291 IS isrowa,iscola; 5292 Mat *aloc; 5293 PetscBool match; 5294 5295 PetscFunctionBegin; 5296 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5297 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5298 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5299 if (!row) { 5300 start = A->rmap->rstart; end = A->rmap->rend; 5301 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5302 } else { 5303 isrowa = *row; 5304 } 5305 if (!col) { 5306 start = A->cmap->rstart; 5307 cmap = a->garray; 5308 nzA = a->A->cmap->n; 5309 nzB = a->B->cmap->n; 5310 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5311 ncols = 0; 5312 for (i=0; i<nzB; i++) { 5313 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5314 else break; 5315 } 5316 imark = i; 5317 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5318 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5320 } else { 5321 iscola = *col; 5322 } 5323 if (scall != MAT_INITIAL_MATRIX) { 5324 PetscCall(PetscMalloc1(1,&aloc)); 5325 aloc[0] = *A_loc; 5326 } 5327 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5328 if (!col) { /* attach global id of condensed columns */ 5329 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5330 } 5331 *A_loc = aloc[0]; 5332 PetscCall(PetscFree(aloc)); 5333 if (!row) { 5334 PetscCall(ISDestroy(&isrowa)); 5335 } 5336 if (!col) { 5337 PetscCall(ISDestroy(&iscola)); 5338 } 5339 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5340 PetscFunctionReturn(0); 5341 } 5342 5343 /* 5344 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5345 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5346 * on a global size. 5347 * */ 5348 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5349 { 5350 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5351 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5352 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5353 PetscMPIInt owner; 5354 PetscSFNode *iremote,*oiremote; 5355 const PetscInt *lrowindices; 5356 PetscSF sf,osf; 5357 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5358 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5359 MPI_Comm comm; 5360 ISLocalToGlobalMapping mapping; 5361 const PetscScalar *pd_a,*po_a; 5362 5363 PetscFunctionBegin; 5364 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5365 /* plocalsize is the number of roots 5366 * nrows is the number of leaves 5367 * */ 5368 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5369 PetscCall(ISGetLocalSize(rows,&nrows)); 5370 PetscCall(PetscCalloc1(nrows,&iremote)); 5371 PetscCall(ISGetIndices(rows,&lrowindices)); 5372 for (i=0;i<nrows;i++) { 5373 /* Find a remote index and an owner for a row 5374 * The row could be local or remote 5375 * */ 5376 owner = 0; 5377 lidx = 0; 5378 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5379 iremote[i].index = lidx; 5380 iremote[i].rank = owner; 5381 } 5382 /* Create SF to communicate how many nonzero columns for each row */ 5383 PetscCall(PetscSFCreate(comm,&sf)); 5384 /* SF will figure out the number of nonzero colunms for each row, and their 5385 * offsets 5386 * */ 5387 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5388 PetscCall(PetscSFSetFromOptions(sf)); 5389 PetscCall(PetscSFSetUp(sf)); 5390 5391 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5392 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5393 PetscCall(PetscCalloc1(nrows,&pnnz)); 5394 roffsets[0] = 0; 5395 roffsets[1] = 0; 5396 for (i=0;i<plocalsize;i++) { 5397 /* diag */ 5398 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5399 /* off diag */ 5400 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5401 /* compute offsets so that we relative location for each row */ 5402 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5403 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5404 } 5405 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5406 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5407 /* 'r' means root, and 'l' means leaf */ 5408 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5409 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5410 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5411 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5412 PetscCall(PetscSFDestroy(&sf)); 5413 PetscCall(PetscFree(roffsets)); 5414 PetscCall(PetscFree(nrcols)); 5415 dntotalcols = 0; 5416 ontotalcols = 0; 5417 ncol = 0; 5418 for (i=0;i<nrows;i++) { 5419 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5420 ncol = PetscMax(pnnz[i],ncol); 5421 /* diag */ 5422 dntotalcols += nlcols[i*2+0]; 5423 /* off diag */ 5424 ontotalcols += nlcols[i*2+1]; 5425 } 5426 /* We do not need to figure the right number of columns 5427 * since all the calculations will be done by going through the raw data 5428 * */ 5429 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5430 PetscCall(MatSetUp(*P_oth)); 5431 PetscCall(PetscFree(pnnz)); 5432 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5433 /* diag */ 5434 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5435 /* off diag */ 5436 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5437 /* diag */ 5438 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5439 /* off diag */ 5440 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5441 dntotalcols = 0; 5442 ontotalcols = 0; 5443 ntotalcols = 0; 5444 for (i=0;i<nrows;i++) { 5445 owner = 0; 5446 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5447 /* Set iremote for diag matrix */ 5448 for (j=0;j<nlcols[i*2+0];j++) { 5449 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5450 iremote[dntotalcols].rank = owner; 5451 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5452 ilocal[dntotalcols++] = ntotalcols++; 5453 } 5454 /* off diag */ 5455 for (j=0;j<nlcols[i*2+1];j++) { 5456 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5457 oiremote[ontotalcols].rank = owner; 5458 oilocal[ontotalcols++] = ntotalcols++; 5459 } 5460 } 5461 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5462 PetscCall(PetscFree(loffsets)); 5463 PetscCall(PetscFree(nlcols)); 5464 PetscCall(PetscSFCreate(comm,&sf)); 5465 /* P serves as roots and P_oth is leaves 5466 * Diag matrix 5467 * */ 5468 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5469 PetscCall(PetscSFSetFromOptions(sf)); 5470 PetscCall(PetscSFSetUp(sf)); 5471 5472 PetscCall(PetscSFCreate(comm,&osf)); 5473 /* Off diag */ 5474 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5475 PetscCall(PetscSFSetFromOptions(osf)); 5476 PetscCall(PetscSFSetUp(osf)); 5477 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5478 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5479 /* We operate on the matrix internal data for saving memory */ 5480 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5481 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5482 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5483 /* Convert to global indices for diag matrix */ 5484 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5485 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5486 /* We want P_oth store global indices */ 5487 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5488 /* Use memory scalable approach */ 5489 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5490 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5491 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5492 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5493 /* Convert back to local indices */ 5494 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5495 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5496 nout = 0; 5497 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5498 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5499 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5500 /* Exchange values */ 5501 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5502 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5503 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5504 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5505 /* Stop PETSc from shrinking memory */ 5506 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5507 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5508 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5509 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5510 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5511 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5512 PetscCall(PetscSFDestroy(&sf)); 5513 PetscCall(PetscSFDestroy(&osf)); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 /* 5518 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5519 * This supports MPIAIJ and MAIJ 5520 * */ 5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5522 { 5523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5524 Mat_SeqAIJ *p_oth; 5525 IS rows,map; 5526 PetscHMapI hamp; 5527 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5528 MPI_Comm comm; 5529 PetscSF sf,osf; 5530 PetscBool has; 5531 5532 PetscFunctionBegin; 5533 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5534 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5535 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5536 * and then create a submatrix (that often is an overlapping matrix) 5537 * */ 5538 if (reuse == MAT_INITIAL_MATRIX) { 5539 /* Use a hash table to figure out unique keys */ 5540 PetscCall(PetscHMapICreate(&hamp)); 5541 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5542 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5543 count = 0; 5544 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5545 for (i=0;i<a->B->cmap->n;i++) { 5546 key = a->garray[i]/dof; 5547 PetscCall(PetscHMapIHas(hamp,key,&has)); 5548 if (!has) { 5549 mapping[i] = count; 5550 PetscCall(PetscHMapISet(hamp,key,count++)); 5551 } else { 5552 /* Current 'i' has the same value the previous step */ 5553 mapping[i] = count-1; 5554 } 5555 } 5556 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5557 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5558 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5559 PetscCall(PetscCalloc1(htsize,&rowindices)); 5560 off = 0; 5561 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5562 PetscCall(PetscHMapIDestroy(&hamp)); 5563 PetscCall(PetscSortInt(htsize,rowindices)); 5564 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5565 /* In case, the matrix was already created but users want to recreate the matrix */ 5566 PetscCall(MatDestroy(P_oth)); 5567 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5568 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5569 PetscCall(ISDestroy(&map)); 5570 PetscCall(ISDestroy(&rows)); 5571 } else if (reuse == MAT_REUSE_MATRIX) { 5572 /* If matrix was already created, we simply update values using SF objects 5573 * that as attached to the matrix ealier. 5574 */ 5575 const PetscScalar *pd_a,*po_a; 5576 5577 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5578 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5579 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5580 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5581 /* Update values in place */ 5582 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5583 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5584 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5585 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5586 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5587 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5588 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5589 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5590 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5591 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5592 PetscFunctionReturn(0); 5593 } 5594 5595 /*@C 5596 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5597 5598 Collective on Mat 5599 5600 Input Parameters: 5601 + A - the first matrix in mpiaij format 5602 . B - the second matrix in mpiaij format 5603 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5604 5605 Output Parameters: 5606 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5607 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5608 - B_seq - the sequential matrix generated 5609 5610 Level: developer 5611 5612 @*/ 5613 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5614 { 5615 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5616 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5617 IS isrowb,iscolb; 5618 Mat *bseq=NULL; 5619 5620 PetscFunctionBegin; 5621 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5622 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5623 } 5624 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5625 5626 if (scall == MAT_INITIAL_MATRIX) { 5627 start = A->cmap->rstart; 5628 cmap = a->garray; 5629 nzA = a->A->cmap->n; 5630 nzB = a->B->cmap->n; 5631 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5632 ncols = 0; 5633 for (i=0; i<nzB; i++) { /* row < local row index */ 5634 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5635 else break; 5636 } 5637 imark = i; 5638 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5639 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5640 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5641 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5642 } else { 5643 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5644 isrowb = *rowb; iscolb = *colb; 5645 PetscCall(PetscMalloc1(1,&bseq)); 5646 bseq[0] = *B_seq; 5647 } 5648 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5649 *B_seq = bseq[0]; 5650 PetscCall(PetscFree(bseq)); 5651 if (!rowb) { 5652 PetscCall(ISDestroy(&isrowb)); 5653 } else { 5654 *rowb = isrowb; 5655 } 5656 if (!colb) { 5657 PetscCall(ISDestroy(&iscolb)); 5658 } else { 5659 *colb = iscolb; 5660 } 5661 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5662 PetscFunctionReturn(0); 5663 } 5664 5665 /* 5666 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5667 of the OFF-DIAGONAL portion of local A 5668 5669 Collective on Mat 5670 5671 Input Parameters: 5672 + A,B - the matrices in mpiaij format 5673 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5674 5675 Output Parameter: 5676 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5677 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5678 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5679 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5680 5681 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5682 for this matrix. This is not desirable.. 5683 5684 Level: developer 5685 5686 */ 5687 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5688 { 5689 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5690 Mat_SeqAIJ *b_oth; 5691 VecScatter ctx; 5692 MPI_Comm comm; 5693 const PetscMPIInt *rprocs,*sprocs; 5694 const PetscInt *srow,*rstarts,*sstarts; 5695 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5696 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5697 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5698 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5699 PetscMPIInt size,tag,rank,nreqs; 5700 5701 PetscFunctionBegin; 5702 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5703 PetscCallMPI(MPI_Comm_size(comm,&size)); 5704 5705 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5706 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5707 } 5708 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5709 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5710 5711 if (size == 1) { 5712 startsj_s = NULL; 5713 bufa_ptr = NULL; 5714 *B_oth = NULL; 5715 PetscFunctionReturn(0); 5716 } 5717 5718 ctx = a->Mvctx; 5719 tag = ((PetscObject)ctx)->tag; 5720 5721 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5722 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5723 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5724 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5725 PetscCall(PetscMalloc1(nreqs,&reqs)); 5726 rwaits = reqs; 5727 swaits = reqs + nrecvs; 5728 5729 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5730 if (scall == MAT_INITIAL_MATRIX) { 5731 /* i-array */ 5732 /*---------*/ 5733 /* post receives */ 5734 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5735 for (i=0; i<nrecvs; i++) { 5736 rowlen = rvalues + rstarts[i]*rbs; 5737 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5738 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5739 } 5740 5741 /* pack the outgoing message */ 5742 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5743 5744 sstartsj[0] = 0; 5745 rstartsj[0] = 0; 5746 len = 0; /* total length of j or a array to be sent */ 5747 if (nsends) { 5748 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5749 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5750 } 5751 for (i=0; i<nsends; i++) { 5752 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5753 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5754 for (j=0; j<nrows; j++) { 5755 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5756 for (l=0; l<sbs; l++) { 5757 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5758 5759 rowlen[j*sbs+l] = ncols; 5760 5761 len += ncols; 5762 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5763 } 5764 k++; 5765 } 5766 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5767 5768 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5769 } 5770 /* recvs and sends of i-array are completed */ 5771 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5772 PetscCall(PetscFree(svalues)); 5773 5774 /* allocate buffers for sending j and a arrays */ 5775 PetscCall(PetscMalloc1(len+1,&bufj)); 5776 PetscCall(PetscMalloc1(len+1,&bufa)); 5777 5778 /* create i-array of B_oth */ 5779 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5780 5781 b_othi[0] = 0; 5782 len = 0; /* total length of j or a array to be received */ 5783 k = 0; 5784 for (i=0; i<nrecvs; i++) { 5785 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5786 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5787 for (j=0; j<nrows; j++) { 5788 b_othi[k+1] = b_othi[k] + rowlen[j]; 5789 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5790 k++; 5791 } 5792 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5793 } 5794 PetscCall(PetscFree(rvalues)); 5795 5796 /* allocate space for j and a arrays of B_oth */ 5797 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5798 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5799 5800 /* j-array */ 5801 /*---------*/ 5802 /* post receives of j-array */ 5803 for (i=0; i<nrecvs; i++) { 5804 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5805 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5806 } 5807 5808 /* pack the outgoing message j-array */ 5809 if (nsends) k = sstarts[0]; 5810 for (i=0; i<nsends; i++) { 5811 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5812 bufJ = bufj+sstartsj[i]; 5813 for (j=0; j<nrows; j++) { 5814 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5815 for (ll=0; ll<sbs; ll++) { 5816 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5817 for (l=0; l<ncols; l++) { 5818 *bufJ++ = cols[l]; 5819 } 5820 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5821 } 5822 } 5823 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5824 } 5825 5826 /* recvs and sends of j-array are completed */ 5827 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5828 } else if (scall == MAT_REUSE_MATRIX) { 5829 sstartsj = *startsj_s; 5830 rstartsj = *startsj_r; 5831 bufa = *bufa_ptr; 5832 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5833 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5834 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5835 5836 /* a-array */ 5837 /*---------*/ 5838 /* post receives of a-array */ 5839 for (i=0; i<nrecvs; i++) { 5840 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5841 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5842 } 5843 5844 /* pack the outgoing message a-array */ 5845 if (nsends) k = sstarts[0]; 5846 for (i=0; i<nsends; i++) { 5847 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5848 bufA = bufa+sstartsj[i]; 5849 for (j=0; j<nrows; j++) { 5850 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5851 for (ll=0; ll<sbs; ll++) { 5852 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5853 for (l=0; l<ncols; l++) { 5854 *bufA++ = vals[l]; 5855 } 5856 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5857 } 5858 } 5859 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5860 } 5861 /* recvs and sends of a-array are completed */ 5862 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5863 PetscCall(PetscFree(reqs)); 5864 5865 if (scall == MAT_INITIAL_MATRIX) { 5866 /* put together the new matrix */ 5867 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5868 5869 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5870 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5871 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5872 b_oth->free_a = PETSC_TRUE; 5873 b_oth->free_ij = PETSC_TRUE; 5874 b_oth->nonew = 0; 5875 5876 PetscCall(PetscFree(bufj)); 5877 if (!startsj_s || !bufa_ptr) { 5878 PetscCall(PetscFree2(sstartsj,rstartsj)); 5879 PetscCall(PetscFree(bufa_ptr)); 5880 } else { 5881 *startsj_s = sstartsj; 5882 *startsj_r = rstartsj; 5883 *bufa_ptr = bufa; 5884 } 5885 } else if (scall == MAT_REUSE_MATRIX) { 5886 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5887 } 5888 5889 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5890 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5891 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5892 PetscFunctionReturn(0); 5893 } 5894 5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5898 #if defined(PETSC_HAVE_MKL_SPARSE) 5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5900 #endif 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5903 #if defined(PETSC_HAVE_ELEMENTAL) 5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5905 #endif 5906 #if defined(PETSC_HAVE_SCALAPACK) 5907 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5908 #endif 5909 #if defined(PETSC_HAVE_HYPRE) 5910 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5911 #endif 5912 #if defined(PETSC_HAVE_CUDA) 5913 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5914 #endif 5915 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5917 #endif 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5919 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5920 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5921 5922 /* 5923 Computes (B'*A')' since computing B*A directly is untenable 5924 5925 n p p 5926 [ ] [ ] [ ] 5927 m [ A ] * n [ B ] = m [ C ] 5928 [ ] [ ] [ ] 5929 5930 */ 5931 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5932 { 5933 Mat At,Bt,Ct; 5934 5935 PetscFunctionBegin; 5936 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5937 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5938 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5939 PetscCall(MatDestroy(&At)); 5940 PetscCall(MatDestroy(&Bt)); 5941 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5942 PetscCall(MatDestroy(&Ct)); 5943 PetscFunctionReturn(0); 5944 } 5945 5946 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5947 { 5948 PetscBool cisdense; 5949 5950 PetscFunctionBegin; 5951 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5952 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5953 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5954 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5955 if (!cisdense) { 5956 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5957 } 5958 PetscCall(MatSetUp(C)); 5959 5960 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5961 PetscFunctionReturn(0); 5962 } 5963 5964 /* ----------------------------------------------------------------*/ 5965 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5966 { 5967 Mat_Product *product = C->product; 5968 Mat A = product->A,B=product->B; 5969 5970 PetscFunctionBegin; 5971 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5972 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5973 5974 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5975 C->ops->productsymbolic = MatProductSymbolic_AB; 5976 PetscFunctionReturn(0); 5977 } 5978 5979 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5980 { 5981 Mat_Product *product = C->product; 5982 5983 PetscFunctionBegin; 5984 if (product->type == MATPRODUCT_AB) { 5985 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5986 } 5987 PetscFunctionReturn(0); 5988 } 5989 5990 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5991 5992 Input Parameters: 5993 5994 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5995 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5996 5997 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5998 5999 For Set1, j1[] contains column indices of the nonzeros. 6000 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6001 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6002 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6003 6004 Similar for Set2. 6005 6006 This routine merges the two sets of nonzeros row by row and removes repeats. 6007 6008 Output Parameters: (memory is allocated by the caller) 6009 6010 i[],j[]: the CSR of the merged matrix, which has m rows. 6011 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6012 imap2[]: similar to imap1[], but for Set2. 6013 Note we order nonzeros row-by-row and from left to right. 6014 */ 6015 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6016 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6017 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6018 { 6019 PetscInt r,m; /* Row index of mat */ 6020 PetscCount t,t1,t2,b1,e1,b2,e2; 6021 6022 PetscFunctionBegin; 6023 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6024 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6025 i[0] = 0; 6026 for (r=0; r<m; r++) { /* Do row by row merging */ 6027 b1 = rowBegin1[r]; 6028 e1 = rowEnd1[r]; 6029 b2 = rowBegin2[r]; 6030 e2 = rowEnd2[r]; 6031 while (b1 < e1 && b2 < e2) { 6032 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6033 j[t] = j1[b1]; 6034 imap1[t1] = t; 6035 imap2[t2] = t; 6036 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6037 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6038 t1++; t2++; t++; 6039 } else if (j1[b1] < j2[b2]) { 6040 j[t] = j1[b1]; 6041 imap1[t1] = t; 6042 b1 += jmap1[t1+1] - jmap1[t1]; 6043 t1++; t++; 6044 } else { 6045 j[t] = j2[b2]; 6046 imap2[t2] = t; 6047 b2 += jmap2[t2+1] - jmap2[t2]; 6048 t2++; t++; 6049 } 6050 } 6051 /* Merge the remaining in either j1[] or j2[] */ 6052 while (b1 < e1) { 6053 j[t] = j1[b1]; 6054 imap1[t1] = t; 6055 b1 += jmap1[t1+1] - jmap1[t1]; 6056 t1++; t++; 6057 } 6058 while (b2 < e2) { 6059 j[t] = j2[b2]; 6060 imap2[t2] = t; 6061 b2 += jmap2[t2+1] - jmap2[t2]; 6062 t2++; t++; 6063 } 6064 i[r+1] = t; 6065 } 6066 PetscFunctionReturn(0); 6067 } 6068 6069 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6070 6071 Input Parameters: 6072 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6073 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6074 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6075 6076 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6077 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6078 6079 Output Parameters: 6080 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6081 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6082 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6083 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6084 6085 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6086 Atot: number of entries belonging to the diagonal block. 6087 Annz: number of unique nonzeros belonging to the diagonal block. 6088 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6089 repeats (i.e., same 'i,j' pair). 6090 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6091 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6092 6093 Atot: number of entries belonging to the diagonal block 6094 Annz: number of unique nonzeros belonging to the diagonal block. 6095 6096 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6097 6098 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6099 */ 6100 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6101 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6102 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6103 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6104 { 6105 PetscInt cstart,cend,rstart,rend,row,col; 6106 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6107 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6108 PetscCount k,m,p,q,r,s,mid; 6109 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6110 6111 PetscFunctionBegin; 6112 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6113 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6114 m = rend - rstart; 6115 6116 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6117 6118 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6119 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6120 */ 6121 while (k<n) { 6122 row = i[k]; 6123 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6124 for (s=k; s<n; s++) if (i[s] != row) break; 6125 for (p=k; p<s; p++) { 6126 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6127 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6128 } 6129 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6130 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6131 rowBegin[row-rstart] = k; 6132 rowMid[row-rstart] = mid; 6133 rowEnd[row-rstart] = s; 6134 6135 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6136 Atot += mid - k; 6137 Btot += s - mid; 6138 6139 /* Count unique nonzeros of this diag/offdiag row */ 6140 for (p=k; p<mid;) { 6141 col = j[p]; 6142 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6143 Annz++; 6144 } 6145 6146 for (p=mid; p<s;) { 6147 col = j[p]; 6148 do {p++;} while (p<s && j[p] == col); 6149 Bnnz++; 6150 } 6151 k = s; 6152 } 6153 6154 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6155 PetscCall(PetscMalloc1(Atot,&Aperm)); 6156 PetscCall(PetscMalloc1(Btot,&Bperm)); 6157 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6158 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6159 6160 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6161 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6162 for (r=0; r<m; r++) { 6163 k = rowBegin[r]; 6164 mid = rowMid[r]; 6165 s = rowEnd[r]; 6166 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6167 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6168 Atot += mid - k; 6169 Btot += s - mid; 6170 6171 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6172 for (p=k; p<mid;) { 6173 col = j[p]; 6174 q = p; 6175 do {p++;} while (p<mid && j[p] == col); 6176 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6177 Annz++; 6178 } 6179 6180 for (p=mid; p<s;) { 6181 col = j[p]; 6182 q = p; 6183 do {p++;} while (p<s && j[p] == col); 6184 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6185 Bnnz++; 6186 } 6187 } 6188 /* Output */ 6189 *Aperm_ = Aperm; 6190 *Annz_ = Annz; 6191 *Atot_ = Atot; 6192 *Ajmap_ = Ajmap; 6193 *Bperm_ = Bperm; 6194 *Bnnz_ = Bnnz; 6195 *Btot_ = Btot; 6196 *Bjmap_ = Bjmap; 6197 PetscFunctionReturn(0); 6198 } 6199 6200 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6201 6202 Input Parameters: 6203 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6204 nnz: number of unique nonzeros in the merged matrix 6205 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6206 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6207 6208 Output Parameter: (memory is allocated by the caller) 6209 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6210 6211 Example: 6212 nnz1 = 4 6213 nnz = 6 6214 imap = [1,3,4,5] 6215 jmap = [0,3,5,6,7] 6216 then, 6217 jmap_new = [0,0,3,3,5,6,7] 6218 */ 6219 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6220 { 6221 PetscCount k,p; 6222 6223 PetscFunctionBegin; 6224 jmap_new[0] = 0; 6225 p = nnz; /* p loops over jmap_new[] backwards */ 6226 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6227 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6228 } 6229 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6230 PetscFunctionReturn(0); 6231 } 6232 6233 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6234 { 6235 MPI_Comm comm; 6236 PetscMPIInt rank,size; 6237 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6238 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6239 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6240 6241 PetscFunctionBegin; 6242 PetscCall(PetscFree(mpiaij->garray)); 6243 PetscCall(VecDestroy(&mpiaij->lvec)); 6244 #if defined(PETSC_USE_CTABLE) 6245 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6246 #else 6247 PetscCall(PetscFree(mpiaij->colmap)); 6248 #endif 6249 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6250 mat->assembled = PETSC_FALSE; 6251 mat->was_assembled = PETSC_FALSE; 6252 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6253 6254 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6255 PetscCallMPI(MPI_Comm_size(comm,&size)); 6256 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6257 PetscCall(PetscLayoutSetUp(mat->rmap)); 6258 PetscCall(PetscLayoutSetUp(mat->cmap)); 6259 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6260 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6261 PetscCall(MatGetLocalSize(mat,&m,&n)); 6262 PetscCall(MatGetSize(mat,&M,&N)); 6263 6264 /* ---------------------------------------------------------------------------*/ 6265 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6266 /* entries come first, then local rows, then remote rows. */ 6267 /* ---------------------------------------------------------------------------*/ 6268 PetscCount n1 = coo_n,*perm1; 6269 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6270 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6271 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6272 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6273 for (k=0; k<n1; k++) perm1[k] = k; 6274 6275 /* Manipulate indices so that entries with negative row or col indices will have smallest 6276 row indices, local entries will have greater but negative row indices, and remote entries 6277 will have positive row indices. 6278 */ 6279 for (k=0; k<n1; k++) { 6280 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6281 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6282 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6283 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6284 } 6285 6286 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6287 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6288 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6289 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6290 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6291 6292 /* ---------------------------------------------------------------------------*/ 6293 /* Split local rows into diag/offdiag portions */ 6294 /* ---------------------------------------------------------------------------*/ 6295 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6296 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6297 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6298 6299 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6300 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6301 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6302 6303 /* ---------------------------------------------------------------------------*/ 6304 /* Send remote rows to their owner */ 6305 /* ---------------------------------------------------------------------------*/ 6306 /* Find which rows should be sent to which remote ranks*/ 6307 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6308 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6309 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6310 const PetscInt *ranges; 6311 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6312 6313 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6314 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6315 for (k=rem; k<n1;) { 6316 PetscMPIInt owner; 6317 PetscInt firstRow,lastRow; 6318 6319 /* Locate a row range */ 6320 firstRow = i1[k]; /* first row of this owner */ 6321 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6322 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6323 6324 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6325 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6326 6327 /* All entries in [k,p) belong to this remote owner */ 6328 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6329 PetscMPIInt *sendto2; 6330 PetscInt *nentries2; 6331 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6332 6333 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6334 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6335 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6336 PetscCall(PetscFree2(sendto,nentries2)); 6337 sendto = sendto2; 6338 nentries = nentries2; 6339 maxNsend = maxNsend2; 6340 } 6341 sendto[nsend] = owner; 6342 nentries[nsend] = p - k; 6343 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6344 nsend++; 6345 k = p; 6346 } 6347 6348 /* Build 1st SF to know offsets on remote to send data */ 6349 PetscSF sf1; 6350 PetscInt nroots = 1,nroots2 = 0; 6351 PetscInt nleaves = nsend,nleaves2 = 0; 6352 PetscInt *offsets; 6353 PetscSFNode *iremote; 6354 6355 PetscCall(PetscSFCreate(comm,&sf1)); 6356 PetscCall(PetscMalloc1(nsend,&iremote)); 6357 PetscCall(PetscMalloc1(nsend,&offsets)); 6358 for (k=0; k<nsend; k++) { 6359 iremote[k].rank = sendto[k]; 6360 iremote[k].index = 0; 6361 nleaves2 += nentries[k]; 6362 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6363 } 6364 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6365 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6366 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6367 PetscCall(PetscSFDestroy(&sf1)); 6368 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6369 6370 /* Build 2nd SF to send remote COOs to their owner */ 6371 PetscSF sf2; 6372 nroots = nroots2; 6373 nleaves = nleaves2; 6374 PetscCall(PetscSFCreate(comm,&sf2)); 6375 PetscCall(PetscSFSetFromOptions(sf2)); 6376 PetscCall(PetscMalloc1(nleaves,&iremote)); 6377 p = 0; 6378 for (k=0; k<nsend; k++) { 6379 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6380 for (q=0; q<nentries[k]; q++,p++) { 6381 iremote[p].rank = sendto[k]; 6382 iremote[p].index = offsets[k] + q; 6383 } 6384 } 6385 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6386 6387 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6388 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6389 6390 /* Send the remote COOs to their owner */ 6391 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6392 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6393 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6394 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6395 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6396 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6397 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6398 6399 PetscCall(PetscFree(offsets)); 6400 PetscCall(PetscFree2(sendto,nentries)); 6401 6402 /* ---------------------------------------------------------------*/ 6403 /* Sort received COOs by row along with the permutation array */ 6404 /* ---------------------------------------------------------------*/ 6405 for (k=0; k<n2; k++) perm2[k] = k; 6406 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6407 6408 /* ---------------------------------------------------------------*/ 6409 /* Split received COOs into diag/offdiag portions */ 6410 /* ---------------------------------------------------------------*/ 6411 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6412 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6413 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6414 6415 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6416 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6417 6418 /* --------------------------------------------------------------------------*/ 6419 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6420 /* --------------------------------------------------------------------------*/ 6421 PetscInt *Ai,*Bi; 6422 PetscInt *Aj,*Bj; 6423 6424 PetscCall(PetscMalloc1(m+1,&Ai)); 6425 PetscCall(PetscMalloc1(m+1,&Bi)); 6426 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6427 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6428 6429 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6430 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6431 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6432 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6433 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6434 6435 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6436 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6437 6438 /* --------------------------------------------------------------------------*/ 6439 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6440 /* expect nonzeros in A/B most likely have local contributing entries */ 6441 /* --------------------------------------------------------------------------*/ 6442 PetscInt Annz = Ai[m]; 6443 PetscInt Bnnz = Bi[m]; 6444 PetscCount *Ajmap1_new,*Bjmap1_new; 6445 6446 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6447 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6448 6449 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6450 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6451 6452 PetscCall(PetscFree(Aimap1)); 6453 PetscCall(PetscFree(Ajmap1)); 6454 PetscCall(PetscFree(Bimap1)); 6455 PetscCall(PetscFree(Bjmap1)); 6456 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6457 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6458 PetscCall(PetscFree3(i1,j1,perm1)); 6459 PetscCall(PetscFree3(i2,j2,perm2)); 6460 6461 Ajmap1 = Ajmap1_new; 6462 Bjmap1 = Bjmap1_new; 6463 6464 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6465 if (Annz < Annz1 + Annz2) { 6466 PetscInt *Aj_new; 6467 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6468 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6469 PetscCall(PetscFree(Aj)); 6470 Aj = Aj_new; 6471 } 6472 6473 if (Bnnz < Bnnz1 + Bnnz2) { 6474 PetscInt *Bj_new; 6475 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6476 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6477 PetscCall(PetscFree(Bj)); 6478 Bj = Bj_new; 6479 } 6480 6481 /* --------------------------------------------------------------------------------*/ 6482 /* Create new submatrices for on-process and off-process coupling */ 6483 /* --------------------------------------------------------------------------------*/ 6484 PetscScalar *Aa,*Ba; 6485 MatType rtype; 6486 Mat_SeqAIJ *a,*b; 6487 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6488 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6489 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6490 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6491 PetscCall(MatDestroy(&mpiaij->A)); 6492 PetscCall(MatDestroy(&mpiaij->B)); 6493 PetscCall(MatGetRootType_Private(mat,&rtype)); 6494 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6495 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6496 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6497 6498 a = (Mat_SeqAIJ*)mpiaij->A->data; 6499 b = (Mat_SeqAIJ*)mpiaij->B->data; 6500 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6501 a->free_a = b->free_a = PETSC_TRUE; 6502 a->free_ij = b->free_ij = PETSC_TRUE; 6503 6504 /* conversion must happen AFTER multiply setup */ 6505 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6506 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6507 PetscCall(VecDestroy(&mpiaij->lvec)); 6508 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6509 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6510 6511 mpiaij->coo_n = coo_n; 6512 mpiaij->coo_sf = sf2; 6513 mpiaij->sendlen = nleaves; 6514 mpiaij->recvlen = nroots; 6515 6516 mpiaij->Annz = Annz; 6517 mpiaij->Bnnz = Bnnz; 6518 6519 mpiaij->Annz2 = Annz2; 6520 mpiaij->Bnnz2 = Bnnz2; 6521 6522 mpiaij->Atot1 = Atot1; 6523 mpiaij->Atot2 = Atot2; 6524 mpiaij->Btot1 = Btot1; 6525 mpiaij->Btot2 = Btot2; 6526 6527 mpiaij->Ajmap1 = Ajmap1; 6528 mpiaij->Aperm1 = Aperm1; 6529 6530 mpiaij->Bjmap1 = Bjmap1; 6531 mpiaij->Bperm1 = Bperm1; 6532 6533 mpiaij->Aimap2 = Aimap2; 6534 mpiaij->Ajmap2 = Ajmap2; 6535 mpiaij->Aperm2 = Aperm2; 6536 6537 mpiaij->Bimap2 = Bimap2; 6538 mpiaij->Bjmap2 = Bjmap2; 6539 mpiaij->Bperm2 = Bperm2; 6540 6541 mpiaij->Cperm1 = Cperm1; 6542 6543 /* Allocate in preallocation. If not used, it has zero cost on host */ 6544 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6545 PetscFunctionReturn(0); 6546 } 6547 6548 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6549 { 6550 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6551 Mat A = mpiaij->A,B = mpiaij->B; 6552 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6553 PetscScalar *Aa,*Ba; 6554 PetscScalar *sendbuf = mpiaij->sendbuf; 6555 PetscScalar *recvbuf = mpiaij->recvbuf; 6556 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6557 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6558 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6559 const PetscCount *Cperm1 = mpiaij->Cperm1; 6560 6561 PetscFunctionBegin; 6562 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6563 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6564 6565 /* Pack entries to be sent to remote */ 6566 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6567 6568 /* Send remote entries to their owner and overlap the communication with local computation */ 6569 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6570 /* Add local entries to A and B */ 6571 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6572 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6573 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6574 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6575 } 6576 for (PetscCount i=0; i<Bnnz; i++) { 6577 PetscScalar sum = 0.0; 6578 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6579 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6580 } 6581 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6582 6583 /* Add received remote entries to A and B */ 6584 for (PetscCount i=0; i<Annz2; i++) { 6585 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6586 } 6587 for (PetscCount i=0; i<Bnnz2; i++) { 6588 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6589 } 6590 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6591 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6592 PetscFunctionReturn(0); 6593 } 6594 6595 /* ----------------------------------------------------------------*/ 6596 6597 /*MC 6598 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6599 6600 Options Database Keys: 6601 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6602 6603 Level: beginner 6604 6605 Notes: 6606 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6607 in this case the values associated with the rows and columns one passes in are set to zero 6608 in the matrix 6609 6610 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6611 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6612 6613 .seealso: `MatCreateAIJ()` 6614 M*/ 6615 6616 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6617 { 6618 Mat_MPIAIJ *b; 6619 PetscMPIInt size; 6620 6621 PetscFunctionBegin; 6622 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6623 6624 PetscCall(PetscNewLog(B,&b)); 6625 B->data = (void*)b; 6626 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6627 B->assembled = PETSC_FALSE; 6628 B->insertmode = NOT_SET_VALUES; 6629 b->size = size; 6630 6631 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6632 6633 /* build cache for off array entries formed */ 6634 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6635 6636 b->donotstash = PETSC_FALSE; 6637 b->colmap = NULL; 6638 b->garray = NULL; 6639 b->roworiented = PETSC_TRUE; 6640 6641 /* stuff used for matrix vector multiply */ 6642 b->lvec = NULL; 6643 b->Mvctx = NULL; 6644 6645 /* stuff for MatGetRow() */ 6646 b->rowindices = NULL; 6647 b->rowvalues = NULL; 6648 b->getrowactive = PETSC_FALSE; 6649 6650 /* flexible pointer used in CUSPARSE classes */ 6651 b->spptr = NULL; 6652 6653 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6654 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6663 #if defined(PETSC_HAVE_CUDA) 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6665 #endif 6666 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6668 #endif 6669 #if defined(PETSC_HAVE_MKL_SPARSE) 6670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6671 #endif 6672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6676 #if defined(PETSC_HAVE_ELEMENTAL) 6677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6678 #endif 6679 #if defined(PETSC_HAVE_SCALAPACK) 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6681 #endif 6682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6684 #if defined(PETSC_HAVE_HYPRE) 6685 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6687 #endif 6688 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6691 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6692 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6693 PetscFunctionReturn(0); 6694 } 6695 6696 /*@C 6697 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6698 and "off-diagonal" part of the matrix in CSR format. 6699 6700 Collective 6701 6702 Input Parameters: 6703 + comm - MPI communicator 6704 . m - number of local rows (Cannot be PETSC_DECIDE) 6705 . n - This value should be the same as the local size used in creating the 6706 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6707 calculated if N is given) For square matrices n is almost always m. 6708 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6709 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6710 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6711 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6712 . a - matrix values 6713 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6714 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6715 - oa - matrix values 6716 6717 Output Parameter: 6718 . mat - the matrix 6719 6720 Level: advanced 6721 6722 Notes: 6723 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6724 must free the arrays once the matrix has been destroyed and not before. 6725 6726 The i and j indices are 0 based 6727 6728 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6729 6730 This sets local rows and cannot be used to set off-processor values. 6731 6732 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6733 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6734 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6735 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6736 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6737 communication if it is known that only local entries will be set. 6738 6739 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6740 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6741 @*/ 6742 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6743 { 6744 Mat_MPIAIJ *maij; 6745 6746 PetscFunctionBegin; 6747 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6748 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6749 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6750 PetscCall(MatCreate(comm,mat)); 6751 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6752 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6753 maij = (Mat_MPIAIJ*) (*mat)->data; 6754 6755 (*mat)->preallocated = PETSC_TRUE; 6756 6757 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6758 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6759 6760 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6761 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6762 6763 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6764 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6765 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6766 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6767 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6768 PetscFunctionReturn(0); 6769 } 6770 6771 typedef struct { 6772 Mat *mp; /* intermediate products */ 6773 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6774 PetscInt cp; /* number of intermediate products */ 6775 6776 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6777 PetscInt *startsj_s,*startsj_r; 6778 PetscScalar *bufa; 6779 Mat P_oth; 6780 6781 /* may take advantage of merging product->B */ 6782 Mat Bloc; /* B-local by merging diag and off-diag */ 6783 6784 /* cusparse does not have support to split between symbolic and numeric phases. 6785 When api_user is true, we don't need to update the numerical values 6786 of the temporary storage */ 6787 PetscBool reusesym; 6788 6789 /* support for COO values insertion */ 6790 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6791 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6792 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6793 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6794 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6795 PetscMemType mtype; 6796 6797 /* customization */ 6798 PetscBool abmerge; 6799 PetscBool P_oth_bind; 6800 } MatMatMPIAIJBACKEND; 6801 6802 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6803 { 6804 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6805 PetscInt i; 6806 6807 PetscFunctionBegin; 6808 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6809 PetscCall(PetscFree(mmdata->bufa)); 6810 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6811 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6812 PetscCall(MatDestroy(&mmdata->P_oth)); 6813 PetscCall(MatDestroy(&mmdata->Bloc)); 6814 PetscCall(PetscSFDestroy(&mmdata->sf)); 6815 for (i = 0; i < mmdata->cp; i++) { 6816 PetscCall(MatDestroy(&mmdata->mp[i])); 6817 } 6818 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6819 PetscCall(PetscFree(mmdata->own[0])); 6820 PetscCall(PetscFree(mmdata->own)); 6821 PetscCall(PetscFree(mmdata->off[0])); 6822 PetscCall(PetscFree(mmdata->off)); 6823 PetscCall(PetscFree(mmdata)); 6824 PetscFunctionReturn(0); 6825 } 6826 6827 /* Copy selected n entries with indices in idx[] of A to v[]. 6828 If idx is NULL, copy the whole data array of A to v[] 6829 */ 6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6831 { 6832 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6833 6834 PetscFunctionBegin; 6835 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6836 if (f) { 6837 PetscCall((*f)(A,n,idx,v)); 6838 } else { 6839 const PetscScalar *vv; 6840 6841 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6842 if (n && idx) { 6843 PetscScalar *w = v; 6844 const PetscInt *oi = idx; 6845 PetscInt j; 6846 6847 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6848 } else { 6849 PetscCall(PetscArraycpy(v,vv,n)); 6850 } 6851 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6852 } 6853 PetscFunctionReturn(0); 6854 } 6855 6856 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6857 { 6858 MatMatMPIAIJBACKEND *mmdata; 6859 PetscInt i,n_d,n_o; 6860 6861 PetscFunctionBegin; 6862 MatCheckProduct(C,1); 6863 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6864 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6865 if (!mmdata->reusesym) { /* update temporary matrices */ 6866 if (mmdata->P_oth) { 6867 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6868 } 6869 if (mmdata->Bloc) { 6870 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6871 } 6872 } 6873 mmdata->reusesym = PETSC_FALSE; 6874 6875 for (i = 0; i < mmdata->cp; i++) { 6876 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6877 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6878 } 6879 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6880 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6881 6882 if (mmdata->mptmp[i]) continue; 6883 if (noff) { 6884 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6885 6886 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6887 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6888 n_o += noff; 6889 n_d += nown; 6890 } else { 6891 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6892 6893 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6894 n_d += mm->nz; 6895 } 6896 } 6897 if (mmdata->hasoffproc) { /* offprocess insertion */ 6898 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6899 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6900 } 6901 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6902 PetscFunctionReturn(0); 6903 } 6904 6905 /* Support for Pt * A, A * P, or Pt * A * P */ 6906 #define MAX_NUMBER_INTERMEDIATE 4 6907 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6908 { 6909 Mat_Product *product = C->product; 6910 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6911 Mat_MPIAIJ *a,*p; 6912 MatMatMPIAIJBACKEND *mmdata; 6913 ISLocalToGlobalMapping P_oth_l2g = NULL; 6914 IS glob = NULL; 6915 const char *prefix; 6916 char pprefix[256]; 6917 const PetscInt *globidx,*P_oth_idx; 6918 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6919 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6920 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6921 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6922 /* a base offset; type-2: sparse with a local to global map table */ 6923 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6924 6925 MatProductType ptype; 6926 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6927 PetscMPIInt size; 6928 6929 PetscFunctionBegin; 6930 MatCheckProduct(C,1); 6931 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6932 ptype = product->type; 6933 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6934 ptype = MATPRODUCT_AB; 6935 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6936 } 6937 switch (ptype) { 6938 case MATPRODUCT_AB: 6939 A = product->A; 6940 P = product->B; 6941 m = A->rmap->n; 6942 n = P->cmap->n; 6943 M = A->rmap->N; 6944 N = P->cmap->N; 6945 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6946 break; 6947 case MATPRODUCT_AtB: 6948 P = product->A; 6949 A = product->B; 6950 m = P->cmap->n; 6951 n = A->cmap->n; 6952 M = P->cmap->N; 6953 N = A->cmap->N; 6954 hasoffproc = PETSC_TRUE; 6955 break; 6956 case MATPRODUCT_PtAP: 6957 A = product->A; 6958 P = product->B; 6959 m = P->cmap->n; 6960 n = P->cmap->n; 6961 M = P->cmap->N; 6962 N = P->cmap->N; 6963 hasoffproc = PETSC_TRUE; 6964 break; 6965 default: 6966 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6967 } 6968 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6969 if (size == 1) hasoffproc = PETSC_FALSE; 6970 6971 /* defaults */ 6972 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6973 mp[i] = NULL; 6974 mptmp[i] = PETSC_FALSE; 6975 rmapt[i] = -1; 6976 cmapt[i] = -1; 6977 rmapa[i] = NULL; 6978 cmapa[i] = NULL; 6979 } 6980 6981 /* customization */ 6982 PetscCall(PetscNew(&mmdata)); 6983 mmdata->reusesym = product->api_user; 6984 if (ptype == MATPRODUCT_AB) { 6985 if (product->api_user) { 6986 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6987 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6988 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6989 PetscOptionsEnd(); 6990 } else { 6991 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 6992 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6993 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6994 PetscOptionsEnd(); 6995 } 6996 } else if (ptype == MATPRODUCT_PtAP) { 6997 if (product->api_user) { 6998 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 6999 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7000 PetscOptionsEnd(); 7001 } else { 7002 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7003 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7004 PetscOptionsEnd(); 7005 } 7006 } 7007 a = (Mat_MPIAIJ*)A->data; 7008 p = (Mat_MPIAIJ*)P->data; 7009 PetscCall(MatSetSizes(C,m,n,M,N)); 7010 PetscCall(PetscLayoutSetUp(C->rmap)); 7011 PetscCall(PetscLayoutSetUp(C->cmap)); 7012 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7013 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7014 7015 cp = 0; 7016 switch (ptype) { 7017 case MATPRODUCT_AB: /* A * P */ 7018 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7019 7020 /* A_diag * P_local (merged or not) */ 7021 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7022 /* P is product->B */ 7023 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7024 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7025 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7026 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7027 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7028 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7029 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7030 mp[cp]->product->api_user = product->api_user; 7031 PetscCall(MatProductSetFromOptions(mp[cp])); 7032 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7033 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7034 PetscCall(ISGetIndices(glob,&globidx)); 7035 rmapt[cp] = 1; 7036 cmapt[cp] = 2; 7037 cmapa[cp] = globidx; 7038 mptmp[cp] = PETSC_FALSE; 7039 cp++; 7040 } else { /* A_diag * P_diag and A_diag * P_off */ 7041 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7042 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7043 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7044 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7045 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7046 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7047 mp[cp]->product->api_user = product->api_user; 7048 PetscCall(MatProductSetFromOptions(mp[cp])); 7049 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7050 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7051 rmapt[cp] = 1; 7052 cmapt[cp] = 1; 7053 mptmp[cp] = PETSC_FALSE; 7054 cp++; 7055 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7056 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7057 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7058 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7059 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7060 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7061 mp[cp]->product->api_user = product->api_user; 7062 PetscCall(MatProductSetFromOptions(mp[cp])); 7063 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7064 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7065 rmapt[cp] = 1; 7066 cmapt[cp] = 2; 7067 cmapa[cp] = p->garray; 7068 mptmp[cp] = PETSC_FALSE; 7069 cp++; 7070 } 7071 7072 /* A_off * P_other */ 7073 if (mmdata->P_oth) { 7074 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7075 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7076 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7077 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7078 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7079 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7080 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7081 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7082 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7083 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7084 mp[cp]->product->api_user = product->api_user; 7085 PetscCall(MatProductSetFromOptions(mp[cp])); 7086 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7087 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7088 rmapt[cp] = 1; 7089 cmapt[cp] = 2; 7090 cmapa[cp] = P_oth_idx; 7091 mptmp[cp] = PETSC_FALSE; 7092 cp++; 7093 } 7094 break; 7095 7096 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7097 /* A is product->B */ 7098 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7099 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7100 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7101 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7102 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7103 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7104 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7105 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7106 mp[cp]->product->api_user = product->api_user; 7107 PetscCall(MatProductSetFromOptions(mp[cp])); 7108 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7109 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7110 PetscCall(ISGetIndices(glob,&globidx)); 7111 rmapt[cp] = 2; 7112 rmapa[cp] = globidx; 7113 cmapt[cp] = 2; 7114 cmapa[cp] = globidx; 7115 mptmp[cp] = PETSC_FALSE; 7116 cp++; 7117 } else { 7118 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7119 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7120 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7121 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7122 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7123 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7124 mp[cp]->product->api_user = product->api_user; 7125 PetscCall(MatProductSetFromOptions(mp[cp])); 7126 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7127 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7128 PetscCall(ISGetIndices(glob,&globidx)); 7129 rmapt[cp] = 1; 7130 cmapt[cp] = 2; 7131 cmapa[cp] = globidx; 7132 mptmp[cp] = PETSC_FALSE; 7133 cp++; 7134 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7135 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7136 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7137 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7138 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7139 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7140 mp[cp]->product->api_user = product->api_user; 7141 PetscCall(MatProductSetFromOptions(mp[cp])); 7142 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7143 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7144 rmapt[cp] = 2; 7145 rmapa[cp] = p->garray; 7146 cmapt[cp] = 2; 7147 cmapa[cp] = globidx; 7148 mptmp[cp] = PETSC_FALSE; 7149 cp++; 7150 } 7151 break; 7152 case MATPRODUCT_PtAP: 7153 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7154 /* P is product->B */ 7155 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7156 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7157 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7158 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7159 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7160 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7161 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7162 mp[cp]->product->api_user = product->api_user; 7163 PetscCall(MatProductSetFromOptions(mp[cp])); 7164 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7165 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7166 PetscCall(ISGetIndices(glob,&globidx)); 7167 rmapt[cp] = 2; 7168 rmapa[cp] = globidx; 7169 cmapt[cp] = 2; 7170 cmapa[cp] = globidx; 7171 mptmp[cp] = PETSC_FALSE; 7172 cp++; 7173 if (mmdata->P_oth) { 7174 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7175 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7176 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7177 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7178 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7179 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7180 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7181 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7182 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7183 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7184 mp[cp]->product->api_user = product->api_user; 7185 PetscCall(MatProductSetFromOptions(mp[cp])); 7186 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7187 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7188 mptmp[cp] = PETSC_TRUE; 7189 cp++; 7190 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7191 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7192 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7193 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7194 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7195 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7196 mp[cp]->product->api_user = product->api_user; 7197 PetscCall(MatProductSetFromOptions(mp[cp])); 7198 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7199 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7200 rmapt[cp] = 2; 7201 rmapa[cp] = globidx; 7202 cmapt[cp] = 2; 7203 cmapa[cp] = P_oth_idx; 7204 mptmp[cp] = PETSC_FALSE; 7205 cp++; 7206 } 7207 break; 7208 default: 7209 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7210 } 7211 /* sanity check */ 7212 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7213 7214 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7215 for (i = 0; i < cp; i++) { 7216 mmdata->mp[i] = mp[i]; 7217 mmdata->mptmp[i] = mptmp[i]; 7218 } 7219 mmdata->cp = cp; 7220 C->product->data = mmdata; 7221 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7222 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7223 7224 /* memory type */ 7225 mmdata->mtype = PETSC_MEMTYPE_HOST; 7226 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7227 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7228 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7229 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7230 7231 /* prepare coo coordinates for values insertion */ 7232 7233 /* count total nonzeros of those intermediate seqaij Mats 7234 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7235 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7236 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7237 */ 7238 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7239 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7240 if (mptmp[cp]) continue; 7241 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7242 const PetscInt *rmap = rmapa[cp]; 7243 const PetscInt mr = mp[cp]->rmap->n; 7244 const PetscInt rs = C->rmap->rstart; 7245 const PetscInt re = C->rmap->rend; 7246 const PetscInt *ii = mm->i; 7247 for (i = 0; i < mr; i++) { 7248 const PetscInt gr = rmap[i]; 7249 const PetscInt nz = ii[i+1] - ii[i]; 7250 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7251 else ncoo_oown += nz; /* this row is local */ 7252 } 7253 } else ncoo_d += mm->nz; 7254 } 7255 7256 /* 7257 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7258 7259 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7260 7261 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7262 7263 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7264 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7265 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7266 7267 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7268 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7269 */ 7270 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7271 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7272 7273 /* gather (i,j) of nonzeros inserted by remote procs */ 7274 if (hasoffproc) { 7275 PetscSF msf; 7276 PetscInt ncoo2,*coo_i2,*coo_j2; 7277 7278 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7279 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7280 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7281 7282 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7283 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7284 PetscInt *idxoff = mmdata->off[cp]; 7285 PetscInt *idxown = mmdata->own[cp]; 7286 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7287 const PetscInt *rmap = rmapa[cp]; 7288 const PetscInt *cmap = cmapa[cp]; 7289 const PetscInt *ii = mm->i; 7290 PetscInt *coi = coo_i + ncoo_o; 7291 PetscInt *coj = coo_j + ncoo_o; 7292 const PetscInt mr = mp[cp]->rmap->n; 7293 const PetscInt rs = C->rmap->rstart; 7294 const PetscInt re = C->rmap->rend; 7295 const PetscInt cs = C->cmap->rstart; 7296 for (i = 0; i < mr; i++) { 7297 const PetscInt *jj = mm->j + ii[i]; 7298 const PetscInt gr = rmap[i]; 7299 const PetscInt nz = ii[i+1] - ii[i]; 7300 if (gr < rs || gr >= re) { /* this is an offproc row */ 7301 for (j = ii[i]; j < ii[i+1]; j++) { 7302 *coi++ = gr; 7303 *idxoff++ = j; 7304 } 7305 if (!cmapt[cp]) { /* already global */ 7306 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7307 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7308 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7309 } else { /* offdiag */ 7310 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7311 } 7312 ncoo_o += nz; 7313 } else { /* this is a local row */ 7314 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7315 } 7316 } 7317 } 7318 mmdata->off[cp + 1] = idxoff; 7319 mmdata->own[cp + 1] = idxown; 7320 } 7321 7322 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7323 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7324 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7325 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7326 ncoo = ncoo_d + ncoo_oown + ncoo2; 7327 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7328 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7329 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7330 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7331 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7332 PetscCall(PetscFree2(coo_i,coo_j)); 7333 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7334 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7335 coo_i = coo_i2; 7336 coo_j = coo_j2; 7337 } else { /* no offproc values insertion */ 7338 ncoo = ncoo_d; 7339 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7340 7341 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7342 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7343 PetscCall(PetscSFSetUp(mmdata->sf)); 7344 } 7345 mmdata->hasoffproc = hasoffproc; 7346 7347 /* gather (i,j) of nonzeros inserted locally */ 7348 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7349 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7350 PetscInt *coi = coo_i + ncoo_d; 7351 PetscInt *coj = coo_j + ncoo_d; 7352 const PetscInt *jj = mm->j; 7353 const PetscInt *ii = mm->i; 7354 const PetscInt *cmap = cmapa[cp]; 7355 const PetscInt *rmap = rmapa[cp]; 7356 const PetscInt mr = mp[cp]->rmap->n; 7357 const PetscInt rs = C->rmap->rstart; 7358 const PetscInt re = C->rmap->rend; 7359 const PetscInt cs = C->cmap->rstart; 7360 7361 if (mptmp[cp]) continue; 7362 if (rmapt[cp] == 1) { /* consecutive rows */ 7363 /* fill coo_i */ 7364 for (i = 0; i < mr; i++) { 7365 const PetscInt gr = i + rs; 7366 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7367 } 7368 /* fill coo_j */ 7369 if (!cmapt[cp]) { /* type-0, already global */ 7370 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7371 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7372 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7373 } else { /* type-2, local to global for sparse columns */ 7374 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7375 } 7376 ncoo_d += mm->nz; 7377 } else if (rmapt[cp] == 2) { /* sparse rows */ 7378 for (i = 0; i < mr; i++) { 7379 const PetscInt *jj = mm->j + ii[i]; 7380 const PetscInt gr = rmap[i]; 7381 const PetscInt nz = ii[i+1] - ii[i]; 7382 if (gr >= rs && gr < re) { /* local rows */ 7383 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7384 if (!cmapt[cp]) { /* type-0, already global */ 7385 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7386 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7387 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7388 } else { /* type-2, local to global for sparse columns */ 7389 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7390 } 7391 ncoo_d += nz; 7392 } 7393 } 7394 } 7395 } 7396 if (glob) { 7397 PetscCall(ISRestoreIndices(glob,&globidx)); 7398 } 7399 PetscCall(ISDestroy(&glob)); 7400 if (P_oth_l2g) { 7401 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7402 } 7403 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7404 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7405 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7406 7407 /* preallocate with COO data */ 7408 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7409 PetscCall(PetscFree2(coo_i,coo_j)); 7410 PetscFunctionReturn(0); 7411 } 7412 7413 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7414 { 7415 Mat_Product *product = mat->product; 7416 #if defined(PETSC_HAVE_DEVICE) 7417 PetscBool match = PETSC_FALSE; 7418 PetscBool usecpu = PETSC_FALSE; 7419 #else 7420 PetscBool match = PETSC_TRUE; 7421 #endif 7422 7423 PetscFunctionBegin; 7424 MatCheckProduct(mat,1); 7425 #if defined(PETSC_HAVE_DEVICE) 7426 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7427 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7428 } 7429 if (match) { /* we can always fallback to the CPU if requested */ 7430 switch (product->type) { 7431 case MATPRODUCT_AB: 7432 if (product->api_user) { 7433 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7434 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7435 PetscOptionsEnd(); 7436 } else { 7437 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7438 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7439 PetscOptionsEnd(); 7440 } 7441 break; 7442 case MATPRODUCT_AtB: 7443 if (product->api_user) { 7444 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7445 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7446 PetscOptionsEnd(); 7447 } else { 7448 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7449 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7450 PetscOptionsEnd(); 7451 } 7452 break; 7453 case MATPRODUCT_PtAP: 7454 if (product->api_user) { 7455 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7456 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7457 PetscOptionsEnd(); 7458 } else { 7459 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7460 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7461 PetscOptionsEnd(); 7462 } 7463 break; 7464 default: 7465 break; 7466 } 7467 match = (PetscBool)!usecpu; 7468 } 7469 #endif 7470 if (match) { 7471 switch (product->type) { 7472 case MATPRODUCT_AB: 7473 case MATPRODUCT_AtB: 7474 case MATPRODUCT_PtAP: 7475 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7476 break; 7477 default: 7478 break; 7479 } 7480 } 7481 /* fallback to MPIAIJ ops */ 7482 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7483 PetscFunctionReturn(0); 7484 } 7485 7486 /* 7487 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7488 7489 n - the number of block indices in cc[] 7490 cc - the block indices (must be large enough to contain the indices) 7491 */ 7492 static inline PetscErrorCode MatCollapseRow(Mat Amat,PetscInt row,PetscInt bs,PetscInt *n,PetscInt *cc) 7493 { 7494 PetscInt cnt = -1,nidx,j; 7495 const PetscInt *idx; 7496 7497 PetscFunctionBegin; 7498 PetscCall(MatGetRow(Amat,row,&nidx,&idx,NULL)); 7499 if (nidx) { 7500 cnt = 0; 7501 cc[cnt] = idx[0]/bs; 7502 for (j=1; j<nidx; j++) { 7503 if (cc[cnt] < idx[j]/bs) cc[++cnt] = idx[j]/bs; 7504 } 7505 } 7506 PetscCall(MatRestoreRow(Amat,row,&nidx,&idx,NULL)); 7507 *n = cnt+1; 7508 PetscFunctionReturn(0); 7509 } 7510 7511 /* 7512 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7513 7514 ncollapsed - the number of block indices 7515 collapsed - the block indices (must be large enough to contain the indices) 7516 */ 7517 static inline PetscErrorCode MatCollapseRows(Mat Amat,PetscInt start,PetscInt bs,PetscInt *w0,PetscInt *w1,PetscInt *w2,PetscInt *ncollapsed,PetscInt **collapsed) 7518 { 7519 PetscInt i,nprev,*cprev = w0,ncur = 0,*ccur = w1,*merged = w2,*cprevtmp; 7520 7521 PetscFunctionBegin; 7522 PetscCall(MatCollapseRow(Amat,start,bs,&nprev,cprev)); 7523 for (i=start+1; i<start+bs; i++) { 7524 PetscCall(MatCollapseRow(Amat,i,bs,&ncur,ccur)); 7525 PetscCall(PetscMergeIntArray(nprev,cprev,ncur,ccur,&nprev,&merged)); 7526 cprevtmp = cprev; cprev = merged; merged = cprevtmp; 7527 } 7528 *ncollapsed = nprev; 7529 if (collapsed) *collapsed = cprev; 7530 PetscFunctionReturn(0); 7531 } 7532 7533 /* -------------------------------------------------------------------------- */ 7534 /* 7535 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7536 7537 Input Parameter: 7538 . Amat - matrix 7539 - symmetrize - make the result symmetric 7540 + scale - scale with diagonal 7541 7542 Output Parameter: 7543 . a_Gmat - output scalar graph >= 0 7544 7545 */ 7546 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) 7547 { 7548 PetscInt Istart,Iend,Ii,jj,kk,ncols,nloc,NN,MM,bs; 7549 MPI_Comm comm; 7550 Mat Gmat; 7551 PetscBool ismpiaij,isseqaij; 7552 Mat a, b, c; 7553 MatType jtype; 7554 7555 PetscFunctionBegin; 7556 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 7557 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7558 PetscCall(MatGetSize(Amat, &MM, &NN)); 7559 PetscCall(MatGetBlockSize(Amat, &bs)); 7560 nloc = (Iend-Istart)/bs; 7561 7562 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATSEQAIJ,&isseqaij)); 7563 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat,MATMPIAIJ,&ismpiaij)); 7564 PetscCheck(isseqaij || ismpiaij,comm,PETSC_ERR_USER,"Require (MPI)AIJ matrix type"); 7565 7566 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7567 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7568 implementation */ 7569 if (bs > 1) { 7570 PetscCall(MatGetType(Amat,&jtype)); 7571 PetscCall(MatCreate(comm, &Gmat)); 7572 PetscCall(MatSetType(Gmat, jtype)); 7573 PetscCall(MatSetSizes(Gmat,nloc,nloc,PETSC_DETERMINE,PETSC_DETERMINE)); 7574 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7575 if (isseqaij || ((Mat_MPIAIJ*)Amat->data)->garray) { 7576 PetscInt *d_nnz, *o_nnz; 7577 MatScalar *aa,val,AA[4096]; 7578 PetscInt *aj,*ai,AJ[4096],nc; 7579 if (isseqaij) { a = Amat; b = NULL; } 7580 else { 7581 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Amat->data; 7582 a = d->A; b = d->B; 7583 } 7584 PetscCall(PetscInfo(Amat,"New bs>1 Graph. nloc=%" PetscInt_FMT "\n",nloc)); 7585 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7586 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7587 PetscInt *nnz = (c==a) ? d_nnz : o_nnz, nmax=0; 7588 const PetscInt *cols; 7589 for (PetscInt brow=0,jj,ok=1,j0; brow < nloc*bs; brow += bs) { // block rows 7590 PetscCall(MatGetRow(c,brow,&jj,&cols,NULL)); 7591 nnz[brow/bs] = jj/bs; 7592 if (jj%bs) ok = 0; 7593 if (cols) j0 = cols[0]; 7594 else j0 = -1; 7595 PetscCall(MatRestoreRow(c,brow,&jj,&cols,NULL)); 7596 if (nnz[brow/bs]>nmax) nmax = nnz[brow/bs]; 7597 for (PetscInt ii=1; ii < bs && nnz[brow/bs] ; ii++) { // check for non-dense blocks 7598 PetscCall(MatGetRow(c,brow+ii,&jj,&cols,NULL)); 7599 if (jj%bs) ok = 0; 7600 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7601 if (nnz[brow/bs] != jj/bs) ok = 0; 7602 PetscCall(MatRestoreRow(c,brow+ii,&jj,&cols,NULL)); 7603 } 7604 if (!ok) { 7605 PetscCall(PetscFree2(d_nnz,o_nnz)); 7606 goto old_bs; 7607 } 7608 } 7609 PetscCheck(nmax<4096,PETSC_COMM_SELF,PETSC_ERR_USER,"Buffer %" PetscInt_FMT " too small 4096.",nmax); 7610 } 7611 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7612 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7613 PetscCall(PetscFree2(d_nnz,o_nnz)); 7614 // diag 7615 for (PetscInt brow=0,n,grow; brow < nloc*bs; brow += bs) { // block rows 7616 Mat_SeqAIJ *aseq = (Mat_SeqAIJ*)a->data; 7617 ai = aseq->i; 7618 n = ai[brow+1] - ai[brow]; 7619 aj = aseq->j + ai[brow]; 7620 for (int k=0; k<n; k += bs) { // block columns 7621 AJ[k/bs] = aj[k]/bs + Istart/bs; // diag starts at (Istart,Istart) 7622 val = 0; 7623 for (int ii=0; ii<bs; ii++) { // rows in block 7624 aa = aseq->a + ai[brow+ii] + k; 7625 for (int jj=0; jj<bs; jj++) { // columns in block 7626 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7627 } 7628 } 7629 AA[k/bs] = val; 7630 } 7631 grow = Istart/bs + brow/bs; 7632 PetscCall(MatSetValues(Gmat,1,&grow,n/bs,AJ,AA,INSERT_VALUES)); 7633 } 7634 // off-diag 7635 if (ismpiaij) { 7636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)Amat->data; 7637 const PetscScalar *vals; 7638 const PetscInt *cols, *garray = aij->garray; 7639 PetscCheck(garray,PETSC_COMM_SELF,PETSC_ERR_USER,"No garray ?"); 7640 for (PetscInt brow=0,grow; brow < nloc*bs; brow += bs) { // block rows 7641 PetscCall(MatGetRow(b,brow,&ncols,&cols,NULL)); 7642 for (int k=0,cidx=0 ; k < ncols ; k += bs, cidx++) { 7643 AA[k/bs] = 0; 7644 AJ[cidx] = garray[cols[k]]/bs; 7645 } 7646 nc = ncols/bs; 7647 PetscCall(MatRestoreRow(b,brow,&ncols,&cols,NULL)); 7648 for (int ii=0; ii<bs; ii++) { // rows in block 7649 PetscCall(MatGetRow(b,brow+ii,&ncols,&cols,&vals)); 7650 for (int k=0; k<ncols; k += bs) { 7651 for (int jj=0; jj<bs; jj++) { // cols in block 7652 AA[k/bs] += PetscAbs(PetscRealPart(vals[k+jj])); 7653 } 7654 } 7655 PetscCall(MatRestoreRow(b,brow+ii,&ncols,&cols,&vals)); 7656 } 7657 grow = Istart/bs + brow/bs; 7658 PetscCall(MatSetValues(Gmat,1,&grow,nc,AJ,AA,INSERT_VALUES)); 7659 } 7660 } 7661 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7662 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7663 } else { 7664 const PetscScalar *vals; 7665 const PetscInt *idx; 7666 PetscInt *d_nnz, *o_nnz,*w0,*w1,*w2; 7667 old_bs: 7668 /* 7669 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7670 */ 7671 PetscCall(PetscInfo(Amat,"OLD bs>1 CreateGraph\n")); 7672 PetscCall(PetscMalloc2(nloc, &d_nnz,isseqaij ? 0 : nloc, &o_nnz)); 7673 if (isseqaij) { 7674 PetscInt max_d_nnz; 7675 /* 7676 Determine exact preallocation count for (sequential) scalar matrix 7677 */ 7678 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat,&max_d_nnz)); 7679 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7680 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7681 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { 7682 PetscCall(MatCollapseRows(Amat,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7683 } 7684 PetscCall(PetscFree3(w0,w1,w2)); 7685 } else if (ismpiaij) { 7686 Mat Daij,Oaij; 7687 const PetscInt *garray; 7688 PetscInt max_d_nnz; 7689 PetscCall(MatMPIAIJGetSeqAIJ(Amat,&Daij,&Oaij,&garray)); 7690 /* 7691 Determine exact preallocation count for diagonal block portion of scalar matrix 7692 */ 7693 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij,&max_d_nnz)); 7694 max_d_nnz = PetscMin(nloc,bs*max_d_nnz); 7695 PetscCall(PetscMalloc3(max_d_nnz, &w0,max_d_nnz, &w1,max_d_nnz, &w2)); 7696 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7697 PetscCall(MatCollapseRows(Daij,Ii,bs,w0,w1,w2,&d_nnz[jj],NULL)); 7698 } 7699 PetscCall(PetscFree3(w0,w1,w2)); 7700 /* 7701 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7702 */ 7703 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7704 o_nnz[jj] = 0; 7705 for (kk=0; kk<bs; kk++) { /* rows that get collapsed to a single row */ 7706 PetscCall(MatGetRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7707 o_nnz[jj] += ncols; 7708 PetscCall(MatRestoreRow(Oaij,Ii+kk,&ncols,NULL,NULL)); 7709 } 7710 if (o_nnz[jj] > (NN/bs-nloc)) o_nnz[jj] = NN/bs-nloc; 7711 } 7712 } else SETERRQ(comm,PETSC_ERR_USER,"Require AIJ matrix type"); 7713 /* get scalar copy (norms) of matrix */ 7714 PetscCall(MatSeqAIJSetPreallocation(Gmat,0,d_nnz)); 7715 PetscCall(MatMPIAIJSetPreallocation(Gmat,0,d_nnz,0,o_nnz)); 7716 PetscCall(PetscFree2(d_nnz,o_nnz)); 7717 for (Ii = Istart; Ii < Iend; Ii++) { 7718 PetscInt dest_row = Ii/bs; 7719 PetscCall(MatGetRow(Amat,Ii,&ncols,&idx,&vals)); 7720 for (jj=0; jj<ncols; jj++) { 7721 PetscInt dest_col = idx[jj]/bs; 7722 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7723 PetscCall(MatSetValues(Gmat,1,&dest_row,1,&dest_col,&sv,ADD_VALUES)); 7724 } 7725 PetscCall(MatRestoreRow(Amat,Ii,&ncols,&idx,&vals)); 7726 } 7727 PetscCall(MatAssemblyBegin(Gmat,MAT_FINAL_ASSEMBLY)); 7728 PetscCall(MatAssemblyEnd(Gmat,MAT_FINAL_ASSEMBLY)); 7729 } 7730 } else { 7731 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7732 procedure via MatAbs API */ 7733 /* just copy scalar matrix & abs() */ 7734 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7735 if (isseqaij) { a = Gmat; b = NULL; } 7736 else { 7737 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7738 a = d->A; b = d->B; 7739 } 7740 /* abs */ 7741 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7742 MatInfo info; 7743 PetscScalar *avals; 7744 PetscCall(MatGetInfo(c,MAT_LOCAL,&info)); 7745 PetscCall(MatSeqAIJGetArray(c,&avals)); 7746 for (int jj = 0; jj<info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7747 PetscCall(MatSeqAIJRestoreArray(c,&avals)); 7748 } 7749 } 7750 if (symmetrize) { 7751 PetscBool issym; 7752 PetscCall(MatGetOption(Amat,MAT_SYMMETRIC,&issym)); 7753 if (!issym) { 7754 Mat matTrans; 7755 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7756 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7757 PetscCall(MatDestroy(&matTrans)); 7758 } 7759 PetscCall(MatSetOption(Gmat,MAT_SYMMETRIC,PETSC_TRUE)); 7760 } else { 7761 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7762 } 7763 if (scale) { 7764 /* scale c for all diagonal values = 1 or -1 */ 7765 Vec diag; 7766 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7767 PetscCall(MatGetDiagonal(Gmat, diag)); 7768 PetscCall(VecReciprocal(diag)); 7769 PetscCall(VecSqrtAbs(diag)); 7770 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7771 PetscCall(VecDestroy(&diag)); 7772 } 7773 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7774 *a_Gmat = Gmat; 7775 PetscFunctionReturn(0); 7776 } 7777 7778 /* -------------------------------------------------------------------------- */ 7779 /*@C 7780 MatFilter_AIJ - filter values with small absolute values 7781 With vfilter < 0 does nothing so should not be called. 7782 7783 Collective on Mat 7784 7785 Input Parameters: 7786 + Gmat - the graph 7787 . vfilter - threshold parameter [0,1) 7788 7789 Output Parameter: 7790 . filteredG - output filtered scalar graph 7791 7792 Level: developer 7793 7794 Notes: 7795 This is called before graph coarsers are called. 7796 This could go into Mat, move 'symm' to GAMG 7797 7798 .seealso: `PCGAMGSetThreshold()` 7799 @*/ 7800 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat,PetscReal vfilter, Mat *filteredG) 7801 { 7802 PetscInt Istart,Iend,ncols,nnz0,nnz1, NN, MM, nloc; 7803 Mat tGmat; 7804 MPI_Comm comm; 7805 const PetscScalar *vals; 7806 const PetscInt *idx; 7807 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols=0; 7808 MatScalar *AA; // this is checked in graph 7809 PetscBool isseqaij; 7810 Mat a, b, c; 7811 MatType jtype; 7812 7813 PetscFunctionBegin; 7814 PetscCall(PetscObjectGetComm((PetscObject)Gmat,&comm)); 7815 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat,MATSEQAIJ,&isseqaij)); 7816 PetscCall(MatGetType(Gmat,&jtype)); 7817 PetscCall(MatCreate(comm, &tGmat)); 7818 PetscCall(MatSetType(tGmat, jtype)); 7819 7820 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7821 Also, if the matrix is symmetric, can we skip this 7822 operation? It can be very expensive on large matrices. */ 7823 7824 // global sizes 7825 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7826 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7827 nloc = Iend - Istart; 7828 PetscCall(PetscMalloc2(nloc, &d_nnz,nloc, &o_nnz)); 7829 if (isseqaij) { a = Gmat; b = NULL; } 7830 else { 7831 Mat_MPIAIJ *d = (Mat_MPIAIJ*)Gmat->data; 7832 a = d->A; b = d->B; 7833 garray = d->garray; 7834 } 7835 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7836 for (PetscInt row=0; row < nloc; row++) { 7837 PetscCall(MatGetRow(a,row,&ncols,NULL,NULL)); 7838 d_nnz[row] = ncols; 7839 if (ncols>maxcols) maxcols=ncols; 7840 PetscCall(MatRestoreRow(a,row,&ncols,NULL,NULL)); 7841 } 7842 if (b) { 7843 for (PetscInt row=0; row < nloc; row++) { 7844 PetscCall(MatGetRow(b,row,&ncols,NULL,NULL)); 7845 o_nnz[row] = ncols; 7846 if (ncols>maxcols) maxcols=ncols; 7847 PetscCall(MatRestoreRow(b,row,&ncols,NULL,NULL)); 7848 } 7849 } 7850 PetscCall(MatSetSizes(tGmat,nloc,nloc,MM,MM)); 7851 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7852 PetscCall(MatSeqAIJSetPreallocation(tGmat,0,d_nnz)); 7853 PetscCall(MatMPIAIJSetPreallocation(tGmat,0,d_nnz,0,o_nnz)); 7854 PetscCall(MatSetOption(tGmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 7855 PetscCall(PetscFree2(d_nnz,o_nnz)); 7856 // 7857 PetscCall(PetscMalloc2(maxcols, &AA,maxcols, &AJ)); 7858 nnz0 = nnz1 = 0; 7859 for (c=a, kk=0 ; c && kk<2 ; c=b, kk++){ 7860 for (PetscInt row=0, grow=Istart, ncol_row, jj ; row < nloc; row++,grow++) { 7861 PetscCall(MatGetRow(c,row,&ncols,&idx,&vals)); 7862 for (ncol_row=jj=0; jj<ncols; jj++,nnz0++) { 7863 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7864 if (PetscRealPart(sv) > vfilter) { 7865 nnz1++; 7866 PetscInt cid = idx[jj] + Istart; //diag 7867 if (c!=a) cid = garray[idx[jj]]; 7868 AA[ncol_row] = vals[jj]; 7869 AJ[ncol_row] = cid; 7870 ncol_row++; 7871 } 7872 } 7873 PetscCall(MatRestoreRow(c,row,&ncols,&idx,&vals)); 7874 PetscCall(MatSetValues(tGmat,1,&grow,ncol_row,AJ,AA,INSERT_VALUES)); 7875 } 7876 } 7877 PetscCall(PetscFree2(AA,AJ)); 7878 PetscCall(MatAssemblyBegin(tGmat,MAT_FINAL_ASSEMBLY)); 7879 PetscCall(MatAssemblyEnd(tGmat,MAT_FINAL_ASSEMBLY)); 7880 PetscCall(MatPropagateSymmetryOptions(Gmat,tGmat)); /* Normal Mat options are not relevant ? */ 7881 7882 PetscCall(PetscInfo(tGmat,"\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", 7883 (!nnz0) ? 1. : 100.*(double)nnz1/(double)nnz0, (double)vfilter, 7884 (!nloc) ? 1. : (double)nnz0/(double)nloc,MM,(int)maxcols)); 7885 7886 *filteredG = tGmat; 7887 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7888 PetscFunctionReturn(0); 7889 } 7890 7891 /* 7892 Special version for direct calls from Fortran 7893 */ 7894 #include <petsc/private/fortranimpl.h> 7895 7896 /* Change these macros so can be used in void function */ 7897 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7898 #undef PetscCall 7899 #define PetscCall(...) do { \ 7900 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7901 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7902 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7903 return; \ 7904 } \ 7905 } while (0) 7906 7907 #undef SETERRQ 7908 #define SETERRQ(comm,ierr,...) do { \ 7909 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7910 return; \ 7911 } while (0) 7912 7913 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7914 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7915 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7916 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7917 #else 7918 #endif 7919 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7920 { 7921 Mat mat = *mmat; 7922 PetscInt m = *mm, n = *mn; 7923 InsertMode addv = *maddv; 7924 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7925 PetscScalar value; 7926 7927 MatCheckPreallocated(mat,1); 7928 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7929 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7930 { 7931 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7932 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7933 PetscBool roworiented = aij->roworiented; 7934 7935 /* Some Variables required in the macro */ 7936 Mat A = aij->A; 7937 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7938 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7939 MatScalar *aa; 7940 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7941 Mat B = aij->B; 7942 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7943 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7944 MatScalar *ba; 7945 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7946 * cannot use "#if defined" inside a macro. */ 7947 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7948 7949 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7950 PetscInt nonew = a->nonew; 7951 MatScalar *ap1,*ap2; 7952 7953 PetscFunctionBegin; 7954 PetscCall(MatSeqAIJGetArray(A,&aa)); 7955 PetscCall(MatSeqAIJGetArray(B,&ba)); 7956 for (i=0; i<m; i++) { 7957 if (im[i] < 0) continue; 7958 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7959 if (im[i] >= rstart && im[i] < rend) { 7960 row = im[i] - rstart; 7961 lastcol1 = -1; 7962 rp1 = aj + ai[row]; 7963 ap1 = aa + ai[row]; 7964 rmax1 = aimax[row]; 7965 nrow1 = ailen[row]; 7966 low1 = 0; 7967 high1 = nrow1; 7968 lastcol2 = -1; 7969 rp2 = bj + bi[row]; 7970 ap2 = ba + bi[row]; 7971 rmax2 = bimax[row]; 7972 nrow2 = bilen[row]; 7973 low2 = 0; 7974 high2 = nrow2; 7975 7976 for (j=0; j<n; j++) { 7977 if (roworiented) value = v[i*n+j]; 7978 else value = v[i+j*m]; 7979 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7980 if (in[j] >= cstart && in[j] < cend) { 7981 col = in[j] - cstart; 7982 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7983 } else if (in[j] < 0) continue; 7984 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7985 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7986 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7987 } else { 7988 if (mat->was_assembled) { 7989 if (!aij->colmap) { 7990 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7991 } 7992 #if defined(PETSC_USE_CTABLE) 7993 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7994 col--; 7995 #else 7996 col = aij->colmap[in[j]] - 1; 7997 #endif 7998 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7999 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8000 col = in[j]; 8001 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8002 B = aij->B; 8003 b = (Mat_SeqAIJ*)B->data; 8004 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 8005 rp2 = bj + bi[row]; 8006 ap2 = ba + bi[row]; 8007 rmax2 = bimax[row]; 8008 nrow2 = bilen[row]; 8009 low2 = 0; 8010 high2 = nrow2; 8011 bm = aij->B->rmap->n; 8012 ba = b->a; 8013 inserted = PETSC_FALSE; 8014 } 8015 } else col = in[j]; 8016 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 8017 } 8018 } 8019 } else if (!aij->donotstash) { 8020 if (roworiented) { 8021 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8022 } else { 8023 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8024 } 8025 } 8026 } 8027 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 8028 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 8029 } 8030 PetscFunctionReturnVoid(); 8031 } 8032 8033 /* Undefining these here since they were redefined from their original definition above! No 8034 * other PETSc functions should be defined past this point, as it is impossible to recover the 8035 * original definitions */ 8036 #undef PetscCall 8037 #undef SETERRQ 8038