1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&B)); 15 PetscCall(PetscObjectCompose((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject)B)); 16 PetscCall(MatGetRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *m,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 21 { 22 Mat B; 23 24 PetscFunctionBegin; 25 PetscCall(PetscObjectQuery((PetscObject)A,"MatGetRowIJ_MPIAIJ",(PetscObject*)&B)); 26 PetscCall(MatRestoreRowIJ(B,oshift,symmetric,inodecompressed,m,ia,ja,done)); 27 PetscCall(MatDestroy(&B)); 28 PetscFunctionReturn(0); 29 } 30 31 /*MC 32 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 35 and MATMPIAIJ otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 42 43 Developer Notes: 44 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 45 enough exist. 46 47 Level: beginner 48 49 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 50 M*/ 51 52 /*MC 53 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 54 55 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 56 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 57 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 58 for communicators controlling multiple processes. It is recommended that you call both of 59 the above preallocation routines for simplicity. 60 61 Options Database Keys: 62 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 63 64 Level: beginner 65 66 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 67 M*/ 68 69 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 70 { 71 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 72 73 PetscFunctionBegin; 74 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 75 A->boundtocpu = flg; 76 #endif 77 if (a->A) { 78 PetscCall(MatBindToCPU(a->A,flg)); 79 } 80 if (a->B) { 81 PetscCall(MatBindToCPU(a->B,flg)); 82 } 83 84 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 85 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 86 * to differ from the parent matrix. */ 87 if (a->lvec) { 88 PetscCall(VecBindToCPU(a->lvec,flg)); 89 } 90 if (a->diag) { 91 PetscCall(VecBindToCPU(a->diag,flg)); 92 } 93 94 PetscFunctionReturn(0); 95 } 96 97 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 98 { 99 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 100 101 PetscFunctionBegin; 102 if (mat->A) { 103 PetscCall(MatSetBlockSizes(mat->A,rbs,cbs)); 104 PetscCall(MatSetBlockSizes(mat->B,rbs,1)); 105 } 106 PetscFunctionReturn(0); 107 } 108 109 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 110 { 111 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 112 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 113 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 114 const PetscInt *ia,*ib; 115 const MatScalar *aa,*bb,*aav,*bav; 116 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 117 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 118 119 PetscFunctionBegin; 120 *keptrows = NULL; 121 122 ia = a->i; 123 ib = b->i; 124 PetscCall(MatSeqAIJGetArrayRead(mat->A,&aav)); 125 PetscCall(MatSeqAIJGetArrayRead(mat->B,&bav)); 126 for (i=0; i<m; i++) { 127 na = ia[i+1] - ia[i]; 128 nb = ib[i+1] - ib[i]; 129 if (!na && !nb) { 130 cnt++; 131 goto ok1; 132 } 133 aa = aav + ia[i]; 134 for (j=0; j<na; j++) { 135 if (aa[j] != 0.0) goto ok1; 136 } 137 bb = bav + ib[i]; 138 for (j=0; j <nb; j++) { 139 if (bb[j] != 0.0) goto ok1; 140 } 141 cnt++; 142 ok1:; 143 } 144 PetscCall(MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M))); 145 if (!n0rows) { 146 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 147 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 148 PetscFunctionReturn(0); 149 } 150 PetscCall(PetscMalloc1(M->rmap->n-cnt,&rows)); 151 cnt = 0; 152 for (i=0; i<m; i++) { 153 na = ia[i+1] - ia[i]; 154 nb = ib[i+1] - ib[i]; 155 if (!na && !nb) continue; 156 aa = aav + ia[i]; 157 for (j=0; j<na;j++) { 158 if (aa[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 bb = bav + ib[i]; 164 for (j=0; j<nb; j++) { 165 if (bb[j] != 0.0) { 166 rows[cnt++] = rstart + i; 167 goto ok2; 168 } 169 } 170 ok2:; 171 } 172 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows)); 173 PetscCall(MatSeqAIJRestoreArrayRead(mat->A,&aav)); 174 PetscCall(MatSeqAIJRestoreArrayRead(mat->B,&bav)); 175 PetscFunctionReturn(0); 176 } 177 178 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 181 PetscBool cong; 182 183 PetscFunctionBegin; 184 PetscCall(MatHasCongruentLayouts(Y,&cong)); 185 if (Y->assembled && cong) { 186 PetscCall(MatDiagonalSet(aij->A,D,is)); 187 } else { 188 PetscCall(MatDiagonalSet_Default(Y,D,is)); 189 } 190 PetscFunctionReturn(0); 191 } 192 193 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 194 { 195 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 196 PetscInt i,rstart,nrows,*rows; 197 198 PetscFunctionBegin; 199 *zrows = NULL; 200 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows)); 201 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 202 for (i=0; i<nrows; i++) rows[i] += rstart; 203 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows)); 204 PetscFunctionReturn(0); 205 } 206 207 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A,PetscInt type,PetscReal *reductions) 208 { 209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 210 PetscInt i,m,n,*garray = aij->garray; 211 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 212 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 213 PetscReal *work; 214 const PetscScalar *dummy; 215 216 PetscFunctionBegin; 217 PetscCall(MatGetSize(A,&m,&n)); 218 PetscCall(PetscCalloc1(n,&work)); 219 PetscCall(MatSeqAIJGetArrayRead(aij->A,&dummy)); 220 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&dummy)); 221 PetscCall(MatSeqAIJGetArrayRead(aij->B,&dummy)); 222 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&dummy)); 223 if (type == NORM_2) { 224 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 225 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 226 } 227 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 228 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 229 } 230 } else if (type == NORM_1) { 231 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 232 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 233 } 234 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 235 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 236 } 237 } else if (type == NORM_INFINITY) { 238 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 239 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 240 } 241 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 242 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 243 } 244 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 245 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 246 work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 247 } 248 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 249 work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 250 } 251 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 252 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 253 work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 254 } 255 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 256 work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 257 } 258 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 259 if (type == NORM_INFINITY) { 260 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 261 } else { 262 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 263 } 264 PetscCall(PetscFree(work)); 265 if (type == NORM_2) { 266 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 267 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 268 for (i=0; i<n; i++) reductions[i] /= m; 269 } 270 PetscFunctionReturn(0); 271 } 272 273 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 274 { 275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 276 IS sis,gis; 277 const PetscInt *isis,*igis; 278 PetscInt n,*iis,nsis,ngis,rstart,i; 279 280 PetscFunctionBegin; 281 PetscCall(MatFindOffBlockDiagonalEntries(a->A,&sis)); 282 PetscCall(MatFindNonzeroRows(a->B,&gis)); 283 PetscCall(ISGetSize(gis,&ngis)); 284 PetscCall(ISGetSize(sis,&nsis)); 285 PetscCall(ISGetIndices(sis,&isis)); 286 PetscCall(ISGetIndices(gis,&igis)); 287 288 PetscCall(PetscMalloc1(ngis+nsis,&iis)); 289 PetscCall(PetscArraycpy(iis,igis,ngis)); 290 PetscCall(PetscArraycpy(iis+ngis,isis,nsis)); 291 n = ngis + nsis; 292 PetscCall(PetscSortRemoveDupsInt(&n,iis)); 293 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 294 for (i=0; i<n; i++) iis[i] += rstart; 295 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is)); 296 297 PetscCall(ISRestoreIndices(sis,&isis)); 298 PetscCall(ISRestoreIndices(gis,&igis)); 299 PetscCall(ISDestroy(&sis)); 300 PetscCall(ISDestroy(&gis)); 301 PetscFunctionReturn(0); 302 } 303 304 /* 305 Local utility routine that creates a mapping from the global column 306 number to the local number in the off-diagonal part of the local 307 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 308 a slightly higher hash table cost; without it it is not scalable (each processor 309 has an order N integer array but is fast to access. 310 */ 311 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 312 { 313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 314 PetscInt n = aij->B->cmap->n,i; 315 316 PetscFunctionBegin; 317 PetscCheck(!n || aij->garray,PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 318 #if defined(PETSC_USE_CTABLE) 319 PetscCall(PetscTableCreate(n,mat->cmap->N+1,&aij->colmap)); 320 for (i=0; i<n; i++) { 321 PetscCall(PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES)); 322 } 323 #else 324 PetscCall(PetscCalloc1(mat->cmap->N+1,&aij->colmap)); 325 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt))); 326 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 327 #endif 328 PetscFunctionReturn(0); 329 } 330 331 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 332 { \ 333 if (col <= lastcol1) low1 = 0; \ 334 else high1 = nrow1; \ 335 lastcol1 = col;\ 336 while (high1-low1 > 5) { \ 337 t = (low1+high1)/2; \ 338 if (rp1[t] > col) high1 = t; \ 339 else low1 = t; \ 340 } \ 341 for (_i=low1; _i<high1; _i++) { \ 342 if (rp1[_i] > col) break; \ 343 if (rp1[_i] == col) { \ 344 if (addv == ADD_VALUES) { \ 345 ap1[_i] += value; \ 346 /* Not sure LogFlops will slow dow the code or not */ \ 347 (void)PetscLogFlops(1.0); \ 348 } \ 349 else ap1[_i] = value; \ 350 goto a_noinsert; \ 351 } \ 352 } \ 353 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 354 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 355 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 356 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 357 N = nrow1++ - 1; a->nz++; high1++; \ 358 /* shift up all the later entries in this row */ \ 359 PetscCall(PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1));\ 360 PetscCall(PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1));\ 361 rp1[_i] = col; \ 362 ap1[_i] = value; \ 363 A->nonzerostate++;\ 364 a_noinsert: ; \ 365 ailen[row] = nrow1; \ 366 } 367 368 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 369 { \ 370 if (col <= lastcol2) low2 = 0; \ 371 else high2 = nrow2; \ 372 lastcol2 = col; \ 373 while (high2-low2 > 5) { \ 374 t = (low2+high2)/2; \ 375 if (rp2[t] > col) high2 = t; \ 376 else low2 = t; \ 377 } \ 378 for (_i=low2; _i<high2; _i++) { \ 379 if (rp2[_i] > col) break; \ 380 if (rp2[_i] == col) { \ 381 if (addv == ADD_VALUES) { \ 382 ap2[_i] += value; \ 383 (void)PetscLogFlops(1.0); \ 384 } \ 385 else ap2[_i] = value; \ 386 goto b_noinsert; \ 387 } \ 388 } \ 389 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 390 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 391 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 392 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 393 N = nrow2++ - 1; b->nz++; high2++; \ 394 /* shift up all the later entries in this row */ \ 395 PetscCall(PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1));\ 396 PetscCall(PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1));\ 397 rp2[_i] = col; \ 398 ap2[_i] = value; \ 399 B->nonzerostate++; \ 400 b_noinsert: ; \ 401 bilen[row] = nrow2; \ 402 } 403 404 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 405 { 406 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 408 PetscInt l,*garray = mat->garray,diag; 409 PetscScalar *aa,*ba; 410 411 PetscFunctionBegin; 412 /* code only works for square matrices A */ 413 414 /* find size of row to the left of the diagonal part */ 415 PetscCall(MatGetOwnershipRange(A,&diag,NULL)); 416 row = row - diag; 417 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 418 if (garray[b->j[b->i[row]+l]] > diag) break; 419 } 420 if (l) { 421 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 422 PetscCall(PetscArraycpy(ba+b->i[row],v,l)); 423 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 424 } 425 426 /* diagonal part */ 427 if (a->i[row+1]-a->i[row]) { 428 PetscCall(MatSeqAIJGetArray(mat->A,&aa)); 429 PetscCall(PetscArraycpy(aa+a->i[row],v+l,(a->i[row+1]-a->i[row]))); 430 PetscCall(MatSeqAIJRestoreArray(mat->A,&aa)); 431 } 432 433 /* right of diagonal part */ 434 if (b->i[row+1]-b->i[row]-l) { 435 PetscCall(MatSeqAIJGetArray(mat->B,&ba)); 436 PetscCall(PetscArraycpy(ba+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l)); 437 PetscCall(MatSeqAIJRestoreArray(mat->B,&ba)); 438 } 439 PetscFunctionReturn(0); 440 } 441 442 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 443 { 444 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 445 PetscScalar value = 0.0; 446 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 447 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 448 PetscBool roworiented = aij->roworiented; 449 450 /* Some Variables required in the macro */ 451 Mat A = aij->A; 452 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 453 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 454 PetscBool ignorezeroentries = a->ignorezeroentries; 455 Mat B = aij->B; 456 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 457 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 458 MatScalar *aa,*ba; 459 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 460 PetscInt nonew; 461 MatScalar *ap1,*ap2; 462 463 PetscFunctionBegin; 464 PetscCall(MatSeqAIJGetArray(A,&aa)); 465 PetscCall(MatSeqAIJGetArray(B,&ba)); 466 for (i=0; i<m; i++) { 467 if (im[i] < 0) continue; 468 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 469 if (im[i] >= rstart && im[i] < rend) { 470 row = im[i] - rstart; 471 lastcol1 = -1; 472 rp1 = aj + ai[row]; 473 ap1 = aa + ai[row]; 474 rmax1 = aimax[row]; 475 nrow1 = ailen[row]; 476 low1 = 0; 477 high1 = nrow1; 478 lastcol2 = -1; 479 rp2 = bj + bi[row]; 480 ap2 = ba + bi[row]; 481 rmax2 = bimax[row]; 482 nrow2 = bilen[row]; 483 low2 = 0; 484 high2 = nrow2; 485 486 for (j=0; j<n; j++) { 487 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 488 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 489 if (in[j] >= cstart && in[j] < cend) { 490 col = in[j] - cstart; 491 nonew = a->nonew; 492 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 493 } else if (in[j] < 0) continue; 494 else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 495 else { 496 if (mat->was_assembled) { 497 if (!aij->colmap) { 498 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 499 } 500 #if defined(PETSC_USE_CTABLE) 501 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); /* map global col ids to local ones */ 502 col--; 503 #else 504 col = aij->colmap[in[j]] - 1; 505 #endif 506 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 507 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 508 col = in[j]; 509 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 510 B = aij->B; 511 b = (Mat_SeqAIJ*)B->data; 512 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 513 rp2 = bj + bi[row]; 514 ap2 = ba + bi[row]; 515 rmax2 = bimax[row]; 516 nrow2 = bilen[row]; 517 low2 = 0; 518 high2 = nrow2; 519 bm = aij->B->rmap->n; 520 ba = b->a; 521 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 522 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 523 PetscCall(PetscInfo(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n",(double)PetscRealPart(value),im[i],in[j])); 524 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 525 } 526 } else col = in[j]; 527 nonew = b->nonew; 528 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 529 } 530 } 531 } else { 532 PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 533 if (!aij->donotstash) { 534 mat->assembled = PETSC_FALSE; 535 if (roworiented) { 536 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 537 } else { 538 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 539 } 540 } 541 } 542 } 543 PetscCall(MatSeqAIJRestoreArray(A,&aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 544 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 545 PetscFunctionReturn(0); 546 } 547 548 /* 549 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 550 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 551 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 552 */ 553 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 554 { 555 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 556 Mat A = aij->A; /* diagonal part of the matrix */ 557 Mat B = aij->B; /* offdiagonal part of the matrix */ 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 560 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 561 PetscInt *ailen = a->ilen,*aj = a->j; 562 PetscInt *bilen = b->ilen,*bj = b->j; 563 PetscInt am = aij->A->rmap->n,j; 564 PetscInt diag_so_far = 0,dnz; 565 PetscInt offd_so_far = 0,onz; 566 567 PetscFunctionBegin; 568 /* Iterate over all rows of the matrix */ 569 for (j=0; j<am; j++) { 570 dnz = onz = 0; 571 /* Iterate over all non-zero columns of the current row */ 572 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 573 /* If column is in the diagonal */ 574 if (mat_j[col] >= cstart && mat_j[col] < cend) { 575 aj[diag_so_far++] = mat_j[col] - cstart; 576 dnz++; 577 } else { /* off-diagonal entries */ 578 bj[offd_so_far++] = mat_j[col]; 579 onz++; 580 } 581 } 582 ailen[j] = dnz; 583 bilen[j] = onz; 584 } 585 PetscFunctionReturn(0); 586 } 587 588 /* 589 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 590 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 591 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 592 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 593 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 594 */ 595 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 596 { 597 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 598 Mat A = aij->A; /* diagonal part of the matrix */ 599 Mat B = aij->B; /* offdiagonal part of the matrix */ 600 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 601 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 602 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 603 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 604 PetscInt *ailen = a->ilen,*aj = a->j; 605 PetscInt *bilen = b->ilen,*bj = b->j; 606 PetscInt am = aij->A->rmap->n,j; 607 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 608 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 609 PetscScalar *aa = a->a,*ba = b->a; 610 611 PetscFunctionBegin; 612 /* Iterate over all rows of the matrix */ 613 for (j=0; j<am; j++) { 614 dnz_row = onz_row = 0; 615 rowstart_offd = full_offd_i[j]; 616 rowstart_diag = full_diag_i[j]; 617 /* Iterate over all non-zero columns of the current row */ 618 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 619 /* If column is in the diagonal */ 620 if (mat_j[col] >= cstart && mat_j[col] < cend) { 621 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 622 aa[rowstart_diag+dnz_row] = mat_a[col]; 623 dnz_row++; 624 } else { /* off-diagonal entries */ 625 bj[rowstart_offd+onz_row] = mat_j[col]; 626 ba[rowstart_offd+onz_row] = mat_a[col]; 627 onz_row++; 628 } 629 } 630 ailen[j] = dnz_row; 631 bilen[j] = onz_row; 632 } 633 PetscFunctionReturn(0); 634 } 635 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 640 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 641 642 PetscFunctionBegin; 643 for (i=0; i<m; i++) { 644 if (idxm[i] < 0) continue; /* negative row */ 645 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 646 if (idxm[i] >= rstart && idxm[i] < rend) { 647 row = idxm[i] - rstart; 648 for (j=0; j<n; j++) { 649 if (idxn[j] < 0) continue; /* negative column */ 650 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 651 if (idxn[j] >= cstart && idxn[j] < cend) { 652 col = idxn[j] - cstart; 653 PetscCall(MatGetValues(aij->A,1,&row,1,&col,v+i*n+j)); 654 } else { 655 if (!aij->colmap) { 656 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 657 } 658 #if defined(PETSC_USE_CTABLE) 659 PetscCall(PetscTableFind(aij->colmap,idxn[j]+1,&col)); 660 col--; 661 #else 662 col = aij->colmap[idxn[j]] - 1; 663 #endif 664 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 665 else { 666 PetscCall(MatGetValues(aij->B,1,&row,1,&col,v+i*n+j)); 667 } 668 } 669 } 670 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 671 } 672 PetscFunctionReturn(0); 673 } 674 675 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 676 { 677 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 678 PetscInt nstash,reallocs; 679 680 PetscFunctionBegin; 681 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 682 683 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 684 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 685 PetscCall(PetscInfo(aij->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 686 PetscFunctionReturn(0); 687 } 688 689 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 PetscMPIInt n; 693 PetscInt i,j,rstart,ncols,flg; 694 PetscInt *row,*col; 695 PetscBool other_disassembled; 696 PetscScalar *val; 697 698 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 699 700 PetscFunctionBegin; 701 if (!aij->donotstash && !mat->nooffprocentries) { 702 while (1) { 703 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 704 if (!flg) break; 705 706 for (i=0; i<n;) { 707 /* Now identify the consecutive vals belonging to the same row */ 708 for (j=i,rstart=row[j]; j<n; j++) { 709 if (row[j] != rstart) break; 710 } 711 if (j < n) ncols = j-i; 712 else ncols = n-i; 713 /* Now assemble all these values with a single function call */ 714 PetscCall(MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 715 i = j; 716 } 717 } 718 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 719 } 720 #if defined(PETSC_HAVE_DEVICE) 721 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 722 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 723 if (mat->boundtocpu) { 724 PetscCall(MatBindToCPU(aij->A,PETSC_TRUE)); 725 PetscCall(MatBindToCPU(aij->B,PETSC_TRUE)); 726 } 727 #endif 728 PetscCall(MatAssemblyBegin(aij->A,mode)); 729 PetscCall(MatAssemblyEnd(aij->A,mode)); 730 731 /* determine if any processor has disassembled, if so we must 732 also disassemble ourself, in order that we may reassemble. */ 733 /* 734 if nonzero structure of submatrix B cannot change then we know that 735 no processor disassembled thus we can skip this stuff 736 */ 737 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 738 PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat))); 739 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 740 PetscCall(MatDisAssemble_MPIAIJ(mat)); 741 } 742 } 743 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 744 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 745 } 746 PetscCall(MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE)); 747 #if defined(PETSC_HAVE_DEVICE) 748 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 749 #endif 750 PetscCall(MatAssemblyBegin(aij->B,mode)); 751 PetscCall(MatAssemblyEnd(aij->B,mode)); 752 753 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 754 755 aij->rowvalues = NULL; 756 757 PetscCall(VecDestroy(&aij->diag)); 758 759 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 760 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 761 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 762 PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat))); 763 } 764 #if defined(PETSC_HAVE_DEVICE) 765 mat->offloadmask = PETSC_OFFLOAD_BOTH; 766 #endif 767 PetscFunctionReturn(0); 768 } 769 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 774 PetscFunctionBegin; 775 PetscCall(MatZeroEntries(l->A)); 776 PetscCall(MatZeroEntries(l->B)); 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 781 { 782 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 783 PetscObjectState sA, sB; 784 PetscInt *lrows; 785 PetscInt r, len; 786 PetscBool cong, lch, gch; 787 788 PetscFunctionBegin; 789 /* get locally owned rows */ 790 PetscCall(MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows)); 791 PetscCall(MatHasCongruentLayouts(A,&cong)); 792 /* fix right hand side if needed */ 793 if (x && b) { 794 const PetscScalar *xx; 795 PetscScalar *bb; 796 797 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 798 PetscCall(VecGetArrayRead(x, &xx)); 799 PetscCall(VecGetArray(b, &bb)); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 PetscCall(VecRestoreArrayRead(x, &xx)); 802 PetscCall(VecRestoreArray(b, &bb)); 803 } 804 805 sA = mat->A->nonzerostate; 806 sB = mat->B->nonzerostate; 807 808 if (diag != 0.0 && cong) { 809 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 810 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 811 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 812 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 813 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 814 PetscInt nnwA, nnwB; 815 PetscBool nnzA, nnzB; 816 817 nnwA = aijA->nonew; 818 nnwB = aijB->nonew; 819 nnzA = aijA->keepnonzeropattern; 820 nnzB = aijB->keepnonzeropattern; 821 if (!nnzA) { 822 PetscCall(PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 823 aijA->nonew = 0; 824 } 825 if (!nnzB) { 826 PetscCall(PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 827 aijB->nonew = 0; 828 } 829 /* Must zero here before the next loop */ 830 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 831 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 832 for (r = 0; r < len; ++r) { 833 const PetscInt row = lrows[r] + A->rmap->rstart; 834 if (row >= A->cmap->N) continue; 835 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 836 } 837 aijA->nonew = nnwA; 838 aijB->nonew = nnwB; 839 } else { 840 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 841 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 842 } 843 PetscCall(PetscFree(lrows)); 844 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 845 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 846 847 /* reduce nonzerostate */ 848 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 849 PetscCall(MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A))); 850 if (gch) A->nonzerostate++; 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 855 { 856 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 857 PetscMPIInt n = A->rmap->n; 858 PetscInt i,j,r,m,len = 0; 859 PetscInt *lrows,*owners = A->rmap->range; 860 PetscMPIInt p = 0; 861 PetscSFNode *rrows; 862 PetscSF sf; 863 const PetscScalar *xx; 864 PetscScalar *bb,*mask,*aij_a; 865 Vec xmask,lmask; 866 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 867 const PetscInt *aj, *ii,*ridx; 868 PetscScalar *aa; 869 870 PetscFunctionBegin; 871 /* Create SF where leaves are input rows and roots are owned rows */ 872 PetscCall(PetscMalloc1(n, &lrows)); 873 for (r = 0; r < n; ++r) lrows[r] = -1; 874 PetscCall(PetscMalloc1(N, &rrows)); 875 for (r = 0; r < N; ++r) { 876 const PetscInt idx = rows[r]; 877 PetscCheck(idx >= 0 && A->rmap->N > idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 878 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 879 PetscCall(PetscLayoutFindOwner(A->rmap,idx,&p)); 880 } 881 rrows[r].rank = p; 882 rrows[r].index = rows[r] - owners[p]; 883 } 884 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject) A), &sf)); 885 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 886 /* Collect flags for rows to be zeroed */ 887 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 888 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR)); 889 PetscCall(PetscSFDestroy(&sf)); 890 /* Compress and put in row numbers */ 891 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 892 /* zero diagonal part of matrix */ 893 PetscCall(MatZeroRowsColumns(l->A,len,lrows,diag,x,b)); 894 /* handle off diagonal part of matrix */ 895 PetscCall(MatCreateVecs(A,&xmask,NULL)); 896 PetscCall(VecDuplicate(l->lvec,&lmask)); 897 PetscCall(VecGetArray(xmask,&bb)); 898 for (i=0; i<len; i++) bb[lrows[i]] = 1; 899 PetscCall(VecRestoreArray(xmask,&bb)); 900 PetscCall(VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 901 PetscCall(VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD)); 902 PetscCall(VecDestroy(&xmask)); 903 if (x && b) { /* this code is buggy when the row and column layout don't match */ 904 PetscBool cong; 905 906 PetscCall(MatHasCongruentLayouts(A,&cong)); 907 PetscCheck(cong,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 908 PetscCall(VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 909 PetscCall(VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD)); 910 PetscCall(VecGetArrayRead(l->lvec,&xx)); 911 PetscCall(VecGetArray(b,&bb)); 912 } 913 PetscCall(VecGetArray(lmask,&mask)); 914 /* remove zeroed rows of off diagonal matrix */ 915 PetscCall(MatSeqAIJGetArray(l->B,&aij_a)); 916 ii = aij->i; 917 for (i=0; i<len; i++) { 918 PetscCall(PetscArrayzero(aij_a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]])); 919 } 920 /* loop over all elements of off process part of matrix zeroing removed columns*/ 921 if (aij->compressedrow.use) { 922 m = aij->compressedrow.nrows; 923 ii = aij->compressedrow.i; 924 ridx = aij->compressedrow.rindex; 925 for (i=0; i<m; i++) { 926 n = ii[i+1] - ii[i]; 927 aj = aij->j + ii[i]; 928 aa = aij_a + ii[i]; 929 930 for (j=0; j<n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[*ridx] -= *aa*xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 ridx++; 939 } 940 } else { /* do not use compressed row format */ 941 m = l->B->rmap->n; 942 for (i=0; i<m; i++) { 943 n = ii[i+1] - ii[i]; 944 aj = aij->j + ii[i]; 945 aa = aij_a + ii[i]; 946 for (j=0; j<n; j++) { 947 if (PetscAbsScalar(mask[*aj])) { 948 if (b) bb[i] -= *aa*xx[*aj]; 949 *aa = 0.0; 950 } 951 aa++; 952 aj++; 953 } 954 } 955 } 956 if (x && b) { 957 PetscCall(VecRestoreArray(b,&bb)); 958 PetscCall(VecRestoreArrayRead(l->lvec,&xx)); 959 } 960 PetscCall(MatSeqAIJRestoreArray(l->B,&aij_a)); 961 PetscCall(VecRestoreArray(lmask,&mask)); 962 PetscCall(VecDestroy(&lmask)); 963 PetscCall(PetscFree(lrows)); 964 965 /* only change matrix nonzero state if pattern was allowed to be changed */ 966 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 967 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 968 PetscCall(MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A))); 969 } 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 976 PetscInt nt; 977 VecScatter Mvctx = a->Mvctx; 978 979 PetscFunctionBegin; 980 PetscCall(VecGetLocalSize(xx,&nt)); 981 PetscCheck(nt == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")",A->cmap->n,nt); 982 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 983 PetscCall((*a->A->ops->mult)(a->A,xx,yy)); 984 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,yy,yy)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 990 { 991 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 992 993 PetscFunctionBegin; 994 PetscCall(MatMultDiagonalBlock(a->A,bb,xx)); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 VecScatter Mvctx = a->Mvctx; 1002 1003 PetscFunctionBegin; 1004 PetscCall(VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1005 PetscCall((*a->A->ops->multadd)(a->A,xx,yy,zz)); 1006 PetscCall(VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1007 PetscCall((*a->B->ops->multadd)(a->B,a->lvec,zz,zz)); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1012 { 1013 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1014 1015 PetscFunctionBegin; 1016 /* do nondiagonal part */ 1017 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1018 /* do local part */ 1019 PetscCall((*a->A->ops->multtranspose)(a->A,xx,yy)); 1020 /* add partial results together */ 1021 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1022 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE)); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1027 { 1028 MPI_Comm comm; 1029 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1030 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1031 IS Me,Notme; 1032 PetscInt M,N,first,last,*notme,i; 1033 PetscBool lf; 1034 PetscMPIInt size; 1035 1036 PetscFunctionBegin; 1037 /* Easy test: symmetric diagonal block */ 1038 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1039 PetscCall(MatIsTranspose(Adia,Bdia,tol,&lf)); 1040 PetscCall(MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat))); 1041 if (!*f) PetscFunctionReturn(0); 1042 PetscCall(PetscObjectGetComm((PetscObject)Amat,&comm)); 1043 PetscCallMPI(MPI_Comm_size(comm,&size)); 1044 if (size == 1) PetscFunctionReturn(0); 1045 1046 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1047 PetscCall(MatGetSize(Amat,&M,&N)); 1048 PetscCall(MatGetOwnershipRange(Amat,&first,&last)); 1049 PetscCall(PetscMalloc1(N-last+first,¬me)); 1050 for (i=0; i<first; i++) notme[i] = i; 1051 for (i=last; i<M; i++) notme[i-last+first] = i; 1052 PetscCall(ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme)); 1053 PetscCall(ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me)); 1054 PetscCall(MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs)); 1055 Aoff = Aoffs[0]; 1056 PetscCall(MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs)); 1057 Boff = Boffs[0]; 1058 PetscCall(MatIsTranspose(Aoff,Boff,tol,f)); 1059 PetscCall(MatDestroyMatrices(1,&Aoffs)); 1060 PetscCall(MatDestroyMatrices(1,&Boffs)); 1061 PetscCall(ISDestroy(&Me)); 1062 PetscCall(ISDestroy(&Notme)); 1063 PetscCall(PetscFree(notme)); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1068 { 1069 PetscFunctionBegin; 1070 PetscCall(MatIsTranspose_MPIAIJ(A,A,tol,f)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1075 { 1076 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1077 1078 PetscFunctionBegin; 1079 /* do nondiagonal part */ 1080 PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->lvec)); 1081 /* do local part */ 1082 PetscCall((*a->A->ops->multtransposeadd)(a->A,xx,yy,zz)); 1083 /* add partial results together */ 1084 PetscCall(VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1085 PetscCall(VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 /* 1090 This only works correctly for square matrices where the subblock A->A is the 1091 diagonal block 1092 */ 1093 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1094 { 1095 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1096 1097 PetscFunctionBegin; 1098 PetscCheck(A->rmap->N == A->cmap->N,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1099 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1100 PetscCall(MatGetDiagonal(a->A,v)); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 1108 PetscFunctionBegin; 1109 PetscCall(MatScale(a->A,aa)); 1110 PetscCall(MatScale(a->B,aa)); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1115 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1116 { 1117 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1118 1119 PetscFunctionBegin; 1120 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1121 PetscCall(PetscFree(aij->Aperm1)); 1122 PetscCall(PetscFree(aij->Bperm1)); 1123 PetscCall(PetscFree(aij->Ajmap1)); 1124 PetscCall(PetscFree(aij->Bjmap1)); 1125 1126 PetscCall(PetscFree(aij->Aimap2)); 1127 PetscCall(PetscFree(aij->Bimap2)); 1128 PetscCall(PetscFree(aij->Aperm2)); 1129 PetscCall(PetscFree(aij->Bperm2)); 1130 PetscCall(PetscFree(aij->Ajmap2)); 1131 PetscCall(PetscFree(aij->Bjmap2)); 1132 1133 PetscCall(PetscFree2(aij->sendbuf,aij->recvbuf)); 1134 PetscCall(PetscFree(aij->Cperm1)); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1139 { 1140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1141 1142 PetscFunctionBegin; 1143 #if defined(PETSC_USE_LOG) 1144 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 1145 #endif 1146 PetscCall(MatStashDestroy_Private(&mat->stash)); 1147 PetscCall(VecDestroy(&aij->diag)); 1148 PetscCall(MatDestroy(&aij->A)); 1149 PetscCall(MatDestroy(&aij->B)); 1150 #if defined(PETSC_USE_CTABLE) 1151 PetscCall(PetscTableDestroy(&aij->colmap)); 1152 #else 1153 PetscCall(PetscFree(aij->colmap)); 1154 #endif 1155 PetscCall(PetscFree(aij->garray)); 1156 PetscCall(VecDestroy(&aij->lvec)); 1157 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1158 PetscCall(PetscFree2(aij->rowvalues,aij->rowindices)); 1159 PetscCall(PetscFree(aij->ld)); 1160 1161 /* Free COO */ 1162 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1163 1164 PetscCall(PetscFree(mat->data)); 1165 1166 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1167 PetscCall(PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL)); 1168 1169 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL)); 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL)); 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL)); 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL)); 1174 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL)); 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL)); 1176 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL)); 1177 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL)); 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL)); 1179 #if defined(PETSC_HAVE_CUDA) 1180 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL)); 1181 #endif 1182 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL)); 1184 #endif 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 1186 #if defined(PETSC_HAVE_ELEMENTAL) 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL)); 1188 #endif 1189 #if defined(PETSC_HAVE_SCALAPACK) 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL)); 1191 #endif 1192 #if defined(PETSC_HAVE_HYPRE) 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL)); 1195 #endif 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL)); 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL)); 1199 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL)); 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL)); 1201 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL)); 1202 #if defined(PETSC_HAVE_MKL_SPARSE) 1203 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL)); 1204 #endif 1205 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL)); 1206 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL)); 1207 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL)); 1208 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetPreallocationCOO_C",NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatSetValuesCOO_C",NULL)); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1214 { 1215 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1216 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1217 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1218 const PetscInt *garray = aij->garray; 1219 const PetscScalar *aa,*ba; 1220 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1221 PetscInt *rowlens; 1222 PetscInt *colidxs; 1223 PetscScalar *matvals; 1224 1225 PetscFunctionBegin; 1226 PetscCall(PetscViewerSetUp(viewer)); 1227 1228 M = mat->rmap->N; 1229 N = mat->cmap->N; 1230 m = mat->rmap->n; 1231 rs = mat->rmap->rstart; 1232 cs = mat->cmap->rstart; 1233 nz = A->nz + B->nz; 1234 1235 /* write matrix header */ 1236 header[0] = MAT_FILE_CLASSID; 1237 header[1] = M; header[2] = N; header[3] = nz; 1238 PetscCallMPI(MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat))); 1239 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1240 1241 /* fill in and store row lengths */ 1242 PetscCall(PetscMalloc1(m,&rowlens)); 1243 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1244 PetscCall(PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT)); 1245 PetscCall(PetscFree(rowlens)); 1246 1247 /* fill in and store column indices */ 1248 PetscCall(PetscMalloc1(nz,&colidxs)); 1249 for (cnt=0, i=0; i<m; i++) { 1250 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1251 if (garray[B->j[jb]] > cs) break; 1252 colidxs[cnt++] = garray[B->j[jb]]; 1253 } 1254 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1255 colidxs[cnt++] = A->j[ja] + cs; 1256 for (; jb<B->i[i+1]; jb++) 1257 colidxs[cnt++] = garray[B->j[jb]]; 1258 } 1259 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1260 PetscCall(PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 1261 PetscCall(PetscFree(colidxs)); 1262 1263 /* fill in and store nonzero values */ 1264 PetscCall(MatSeqAIJGetArrayRead(aij->A,&aa)); 1265 PetscCall(MatSeqAIJGetArrayRead(aij->B,&ba)); 1266 PetscCall(PetscMalloc1(nz,&matvals)); 1267 for (cnt=0, i=0; i<m; i++) { 1268 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1269 if (garray[B->j[jb]] > cs) break; 1270 matvals[cnt++] = ba[jb]; 1271 } 1272 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1273 matvals[cnt++] = aa[ja]; 1274 for (; jb<B->i[i+1]; jb++) 1275 matvals[cnt++] = ba[jb]; 1276 } 1277 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&aa)); 1278 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&ba)); 1279 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1280 PetscCall(PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 1281 PetscCall(PetscFree(matvals)); 1282 1283 /* write block size option to the viewer's .info file */ 1284 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #include <petscdraw.h> 1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 PetscMPIInt rank = aij->rank,size = aij->size; 1293 PetscBool isdraw,iascii,isbinary; 1294 PetscViewer sviewer; 1295 PetscViewerFormat format; 1296 1297 PetscFunctionBegin; 1298 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1299 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1300 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1301 if (iascii) { 1302 PetscCall(PetscViewerGetFormat(viewer,&format)); 1303 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1304 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1305 PetscCall(PetscMalloc1(size,&nz)); 1306 PetscCallMPI(MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat))); 1307 for (i=0; i<(PetscInt)size; i++) { 1308 nmax = PetscMax(nmax,nz[i]); 1309 nmin = PetscMin(nmin,nz[i]); 1310 navg += nz[i]; 1311 } 1312 PetscCall(PetscFree(nz)); 1313 navg = navg/size; 1314 PetscCall(PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n",nmin,navg,nmax)); 1315 PetscFunctionReturn(0); 1316 } 1317 PetscCall(PetscViewerGetFormat(viewer,&format)); 1318 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1319 MatInfo info; 1320 PetscInt *inodes=NULL; 1321 1322 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 1323 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 1324 PetscCall(MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL)); 1325 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1326 if (!inodes) { 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", 1328 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1329 } else { 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", 1331 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory)); 1332 } 1333 PetscCall(MatGetInfo(aij->A,MAT_LOCAL,&info)); 1334 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1335 PetscCall(MatGetInfo(aij->B,MAT_LOCAL,&info)); 1336 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used)); 1337 PetscCall(PetscViewerFlush(viewer)); 1338 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1339 PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n")); 1340 PetscCall(VecScatterView(aij->Mvctx,viewer)); 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1343 PetscInt inodecount,inodelimit,*inodes; 1344 PetscCall(MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit)); 1345 if (inodes) { 1346 PetscCall(PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n",inodecount,inodelimit)); 1347 } else { 1348 PetscCall(PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n")); 1349 } 1350 PetscFunctionReturn(0); 1351 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1352 PetscFunctionReturn(0); 1353 } 1354 } else if (isbinary) { 1355 if (size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A,viewer)); 1358 } else { 1359 PetscCall(MatView_MPIAIJ_Binary(mat,viewer)); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (iascii && size == 1) { 1363 PetscCall(PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name)); 1364 PetscCall(MatView(aij->A,viewer)); 1365 PetscFunctionReturn(0); 1366 } else if (isdraw) { 1367 PetscDraw draw; 1368 PetscBool isnull; 1369 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1370 PetscCall(PetscDrawIsNull(draw,&isnull)); 1371 if (isnull) PetscFunctionReturn(0); 1372 } 1373 1374 { /* assemble the entire matrix onto first processor */ 1375 Mat A = NULL, Av; 1376 IS isrow,iscol; 1377 1378 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1380 PetscCall(MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A)); 1381 PetscCall(MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL)); 1382 /* The commented code uses MatCreateSubMatrices instead */ 1383 /* 1384 Mat *AA, A = NULL, Av; 1385 IS isrow,iscol; 1386 1387 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1388 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1389 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1390 if (rank == 0) { 1391 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1392 A = AA[0]; 1393 Av = AA[0]; 1394 } 1395 PetscCall(MatDestroySubMatrices(1,&AA)); 1396 */ 1397 PetscCall(ISDestroy(&iscol)); 1398 PetscCall(ISDestroy(&isrow)); 1399 /* 1400 Everyone has to call to draw the matrix since the graphics waits are 1401 synchronized across all processors that share the PetscDraw object 1402 */ 1403 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1404 if (rank == 0) { 1405 if (((PetscObject)mat)->name) { 1406 PetscCall(PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name)); 1407 } 1408 PetscCall(MatView_SeqAIJ(Av,sviewer)); 1409 } 1410 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 1411 PetscCall(PetscViewerFlush(viewer)); 1412 PetscCall(MatDestroy(&A)); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1418 { 1419 PetscBool iascii,isdraw,issocket,isbinary; 1420 1421 PetscFunctionBegin; 1422 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1423 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1424 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1425 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 1426 if (iascii || isdraw || isbinary || issocket) { 1427 PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer)); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1433 { 1434 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1435 Vec bb1 = NULL; 1436 PetscBool hasop; 1437 1438 PetscFunctionBegin; 1439 if (flag == SOR_APPLY_UPPER) { 1440 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1441 PetscFunctionReturn(0); 1442 } 1443 1444 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1445 PetscCall(VecDuplicate(bb,&bb1)); 1446 } 1447 1448 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1449 if (flag & SOR_ZERO_INITIAL_GUESS) { 1450 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1451 its--; 1452 } 1453 1454 while (its--) { 1455 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1456 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 PetscCall(VecScale(mat->lvec,-1.0)); 1460 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx)); 1464 } 1465 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1466 if (flag & SOR_ZERO_INITIAL_GUESS) { 1467 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1468 its--; 1469 } 1470 while (its--) { 1471 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1472 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1473 1474 /* update rhs: bb1 = bb - B*x */ 1475 PetscCall(VecScale(mat->lvec,-1.0)); 1476 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1477 1478 /* local sweep */ 1479 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx)); 1480 } 1481 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx)); 1484 its--; 1485 } 1486 while (its--) { 1487 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1488 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1489 1490 /* update rhs: bb1 = bb - B*x */ 1491 PetscCall(VecScale(mat->lvec,-1.0)); 1492 PetscCall((*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1)); 1493 1494 /* local sweep */ 1495 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx)); 1496 } 1497 } else if (flag & SOR_EISENSTAT) { 1498 Vec xx1; 1499 1500 PetscCall(VecDuplicate(bb,&xx1)); 1501 PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx)); 1502 1503 PetscCall(VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1504 PetscCall(VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD)); 1505 if (!mat->diag) { 1506 PetscCall(MatCreateVecs(matin,&mat->diag,NULL)); 1507 PetscCall(MatGetDiagonal(matin,mat->diag)); 1508 } 1509 PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop)); 1510 if (hasop) { 1511 PetscCall(MatMultDiagonalBlock(matin,xx,bb1)); 1512 } else { 1513 PetscCall(VecPointwiseMult(bb1,mat->diag,xx)); 1514 } 1515 PetscCall(VecAYPX(bb1,(omega-2.0)/omega,bb)); 1516 1517 PetscCall(MatMultAdd(mat->B,mat->lvec,bb1,bb1)); 1518 1519 /* local sweep */ 1520 PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1)); 1521 PetscCall(VecAXPY(xx,1.0,xx1)); 1522 PetscCall(VecDestroy(&xx1)); 1523 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1524 1525 PetscCall(VecDestroy(&bb1)); 1526 1527 matin->factorerrortype = mat->A->factorerrortype; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1532 { 1533 Mat aA,aB,Aperm; 1534 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1535 PetscScalar *aa,*ba; 1536 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1537 PetscSF rowsf,sf; 1538 IS parcolp = NULL; 1539 PetscBool done; 1540 1541 PetscFunctionBegin; 1542 PetscCall(MatGetLocalSize(A,&m,&n)); 1543 PetscCall(ISGetIndices(rowp,&rwant)); 1544 PetscCall(ISGetIndices(colp,&cwant)); 1545 PetscCall(PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest)); 1546 1547 /* Invert row permutation to find out where my rows should go */ 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf)); 1549 PetscCall(PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant)); 1550 PetscCall(PetscSFSetFromOptions(rowsf)); 1551 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1552 PetscCall(PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1553 PetscCall(PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE)); 1554 1555 /* Invert column permutation to find out where my columns should go */ 1556 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1557 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant)); 1558 PetscCall(PetscSFSetFromOptions(sf)); 1559 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1560 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1561 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE)); 1562 PetscCall(PetscSFDestroy(&sf)); 1563 1564 PetscCall(ISRestoreIndices(rowp,&rwant)); 1565 PetscCall(ISRestoreIndices(colp,&cwant)); 1566 PetscCall(MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols)); 1567 1568 /* Find out where my gcols should go */ 1569 PetscCall(MatGetSize(aB,NULL,&ng)); 1570 PetscCall(PetscMalloc1(ng,&gcdest)); 1571 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1572 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols)); 1573 PetscCall(PetscSFSetFromOptions(sf)); 1574 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1575 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE)); 1576 PetscCall(PetscSFDestroy(&sf)); 1577 1578 PetscCall(PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz)); 1579 PetscCall(MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1580 PetscCall(MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1581 for (i=0; i<m; i++) { 1582 PetscInt row = rdest[i]; 1583 PetscMPIInt rowner; 1584 PetscCall(PetscLayoutFindOwner(A->rmap,row,&rowner)); 1585 for (j=ai[i]; j<ai[i+1]; j++) { 1586 PetscInt col = cdest[aj[j]]; 1587 PetscMPIInt cowner; 1588 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); /* Could build an index for the columns to eliminate this search */ 1589 if (rowner == cowner) dnnz[i]++; 1590 else onnz[i]++; 1591 } 1592 for (j=bi[i]; j<bi[i+1]; j++) { 1593 PetscInt col = gcdest[bj[j]]; 1594 PetscMPIInt cowner; 1595 PetscCall(PetscLayoutFindOwner(A->cmap,col,&cowner)); 1596 if (rowner == cowner) dnnz[i]++; 1597 else onnz[i]++; 1598 } 1599 } 1600 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1601 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE)); 1602 PetscCall(PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1603 PetscCall(PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE)); 1604 PetscCall(PetscSFDestroy(&rowsf)); 1605 1606 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm)); 1607 PetscCall(MatSeqAIJGetArray(aA,&aa)); 1608 PetscCall(MatSeqAIJGetArray(aB,&ba)); 1609 for (i=0; i<m; i++) { 1610 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1611 PetscInt j0,rowlen; 1612 rowlen = ai[i+1] - ai[i]; 1613 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1614 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1615 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES)); 1616 } 1617 rowlen = bi[i+1] - bi[i]; 1618 for (j0=j=0; j<rowlen; j0=j) { 1619 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1620 PetscCall(MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES)); 1621 } 1622 } 1623 PetscCall(MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY)); 1624 PetscCall(MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY)); 1625 PetscCall(MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done)); 1626 PetscCall(MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done)); 1627 PetscCall(MatSeqAIJRestoreArray(aA,&aa)); 1628 PetscCall(MatSeqAIJRestoreArray(aB,&ba)); 1629 PetscCall(PetscFree4(dnnz,onnz,tdnnz,tonnz)); 1630 PetscCall(PetscFree3(work,rdest,cdest)); 1631 PetscCall(PetscFree(gcdest)); 1632 if (parcolp) PetscCall(ISDestroy(&colp)); 1633 *B = Aperm; 1634 PetscFunctionReturn(0); 1635 } 1636 1637 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1638 { 1639 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1640 1641 PetscFunctionBegin; 1642 PetscCall(MatGetSize(aij->B,NULL,nghosts)); 1643 if (ghosts) *ghosts = aij->garray; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1648 { 1649 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1650 Mat A = mat->A,B = mat->B; 1651 PetscLogDouble isend[5],irecv[5]; 1652 1653 PetscFunctionBegin; 1654 info->block_size = 1.0; 1655 PetscCall(MatGetInfo(A,MAT_LOCAL,info)); 1656 1657 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1658 isend[3] = info->memory; isend[4] = info->mallocs; 1659 1660 PetscCall(MatGetInfo(B,MAT_LOCAL,info)); 1661 1662 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1663 isend[3] += info->memory; isend[4] += info->mallocs; 1664 if (flag == MAT_LOCAL) { 1665 info->nz_used = isend[0]; 1666 info->nz_allocated = isend[1]; 1667 info->nz_unneeded = isend[2]; 1668 info->memory = isend[3]; 1669 info->mallocs = isend[4]; 1670 } else if (flag == MAT_GLOBAL_MAX) { 1671 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin))); 1672 1673 info->nz_used = irecv[0]; 1674 info->nz_allocated = irecv[1]; 1675 info->nz_unneeded = irecv[2]; 1676 info->memory = irecv[3]; 1677 info->mallocs = irecv[4]; 1678 } else if (flag == MAT_GLOBAL_SUM) { 1679 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin))); 1680 1681 info->nz_used = irecv[0]; 1682 info->nz_allocated = irecv[1]; 1683 info->nz_unneeded = irecv[2]; 1684 info->memory = irecv[3]; 1685 info->mallocs = irecv[4]; 1686 } 1687 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1688 info->fill_ratio_needed = 0; 1689 info->factor_mallocs = 0; 1690 PetscFunctionReturn(0); 1691 } 1692 1693 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 1697 PetscFunctionBegin; 1698 switch (op) { 1699 case MAT_NEW_NONZERO_LOCATIONS: 1700 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1701 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1702 case MAT_KEEP_NONZERO_PATTERN: 1703 case MAT_NEW_NONZERO_LOCATION_ERR: 1704 case MAT_USE_INODES: 1705 case MAT_IGNORE_ZERO_ENTRIES: 1706 case MAT_FORM_EXPLICIT_TRANSPOSE: 1707 MatCheckPreallocated(A,1); 1708 PetscCall(MatSetOption(a->A,op,flg)); 1709 PetscCall(MatSetOption(a->B,op,flg)); 1710 break; 1711 case MAT_ROW_ORIENTED: 1712 MatCheckPreallocated(A,1); 1713 a->roworiented = flg; 1714 1715 PetscCall(MatSetOption(a->A,op,flg)); 1716 PetscCall(MatSetOption(a->B,op,flg)); 1717 break; 1718 case MAT_FORCE_DIAGONAL_ENTRIES: 1719 case MAT_SORTED_FULL: 1720 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1721 break; 1722 case MAT_IGNORE_OFF_PROC_ENTRIES: 1723 a->donotstash = flg; 1724 break; 1725 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1726 case MAT_SPD: 1727 case MAT_SYMMETRIC: 1728 case MAT_STRUCTURALLY_SYMMETRIC: 1729 case MAT_HERMITIAN: 1730 case MAT_SYMMETRY_ETERNAL: 1731 break; 1732 case MAT_SUBMAT_SINGLEIS: 1733 A->submat_singleis = flg; 1734 break; 1735 case MAT_STRUCTURE_ONLY: 1736 /* The option is handled directly by MatSetOption() */ 1737 break; 1738 default: 1739 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1740 } 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1745 { 1746 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1747 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1748 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1749 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1750 PetscInt *cmap,*idx_p; 1751 1752 PetscFunctionBegin; 1753 PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1754 mat->getrowactive = PETSC_TRUE; 1755 1756 if (!mat->rowvalues && (idx || v)) { 1757 /* 1758 allocate enough space to hold information from the longest row. 1759 */ 1760 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1761 PetscInt max = 1,tmp; 1762 for (i=0; i<matin->rmap->n; i++) { 1763 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1764 if (max < tmp) max = tmp; 1765 } 1766 PetscCall(PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices)); 1767 } 1768 1769 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1770 lrow = row - rstart; 1771 1772 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1773 if (!v) {pvA = NULL; pvB = NULL;} 1774 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1775 PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA)); 1776 PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB)); 1777 nztot = nzA + nzB; 1778 1779 cmap = mat->garray; 1780 if (v || idx) { 1781 if (nztot) { 1782 /* Sort by increasing column numbers, assuming A and B already sorted */ 1783 PetscInt imark = -1; 1784 if (v) { 1785 *v = v_p = mat->rowvalues; 1786 for (i=0; i<nzB; i++) { 1787 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1788 else break; 1789 } 1790 imark = i; 1791 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1792 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1793 } 1794 if (idx) { 1795 *idx = idx_p = mat->rowindices; 1796 if (imark > -1) { 1797 for (i=0; i<imark; i++) { 1798 idx_p[i] = cmap[cworkB[i]]; 1799 } 1800 } else { 1801 for (i=0; i<nzB; i++) { 1802 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1803 else break; 1804 } 1805 imark = i; 1806 } 1807 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1808 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1809 } 1810 } else { 1811 if (idx) *idx = NULL; 1812 if (v) *v = NULL; 1813 } 1814 } 1815 *nz = nztot; 1816 PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA)); 1817 PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB)); 1818 PetscFunctionReturn(0); 1819 } 1820 1821 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1822 { 1823 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1824 1825 PetscFunctionBegin; 1826 PetscCheck(aij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1827 aij->getrowactive = PETSC_FALSE; 1828 PetscFunctionReturn(0); 1829 } 1830 1831 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1832 { 1833 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1834 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1835 PetscInt i,j,cstart = mat->cmap->rstart; 1836 PetscReal sum = 0.0; 1837 const MatScalar *v,*amata,*bmata; 1838 1839 PetscFunctionBegin; 1840 if (aij->size == 1) { 1841 PetscCall(MatNorm(aij->A,type,norm)); 1842 } else { 1843 PetscCall(MatSeqAIJGetArrayRead(aij->A,&amata)); 1844 PetscCall(MatSeqAIJGetArrayRead(aij->B,&bmata)); 1845 if (type == NORM_FROBENIUS) { 1846 v = amata; 1847 for (i=0; i<amat->nz; i++) { 1848 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1849 } 1850 v = bmata; 1851 for (i=0; i<bmat->nz; i++) { 1852 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1853 } 1854 PetscCall(MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1855 *norm = PetscSqrtReal(*norm); 1856 PetscCall(PetscLogFlops(2.0*amat->nz+2.0*bmat->nz)); 1857 } else if (type == NORM_1) { /* max column norm */ 1858 PetscReal *tmp,*tmp2; 1859 PetscInt *jj,*garray = aij->garray; 1860 PetscCall(PetscCalloc1(mat->cmap->N+1,&tmp)); 1861 PetscCall(PetscMalloc1(mat->cmap->N+1,&tmp2)); 1862 *norm = 0.0; 1863 v = amata; jj = amat->j; 1864 for (j=0; j<amat->nz; j++) { 1865 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1866 } 1867 v = bmata; jj = bmat->j; 1868 for (j=0; j<bmat->nz; j++) { 1869 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1870 } 1871 PetscCall(MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat))); 1872 for (j=0; j<mat->cmap->N; j++) { 1873 if (tmp2[j] > *norm) *norm = tmp2[j]; 1874 } 1875 PetscCall(PetscFree(tmp)); 1876 PetscCall(PetscFree(tmp2)); 1877 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1878 } else if (type == NORM_INFINITY) { /* max row norm */ 1879 PetscReal ntemp = 0.0; 1880 for (j=0; j<aij->A->rmap->n; j++) { 1881 v = amata + amat->i[j]; 1882 sum = 0.0; 1883 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1884 sum += PetscAbsScalar(*v); v++; 1885 } 1886 v = bmata + bmat->i[j]; 1887 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); v++; 1889 } 1890 if (sum > ntemp) ntemp = sum; 1891 } 1892 PetscCall(MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat))); 1893 PetscCall(PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0))); 1894 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1895 PetscCall(MatSeqAIJRestoreArrayRead(aij->A,&amata)); 1896 PetscCall(MatSeqAIJRestoreArrayRead(aij->B,&bmata)); 1897 } 1898 PetscFunctionReturn(0); 1899 } 1900 1901 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1902 { 1903 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1904 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1905 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1906 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1907 Mat B,A_diag,*B_diag; 1908 const MatScalar *pbv,*bv; 1909 1910 PetscFunctionBegin; 1911 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1912 ai = Aloc->i; aj = Aloc->j; 1913 bi = Bloc->i; bj = Bloc->j; 1914 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1915 PetscInt *d_nnz,*g_nnz,*o_nnz; 1916 PetscSFNode *oloc; 1917 PETSC_UNUSED PetscSF sf; 1918 1919 PetscCall(PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc)); 1920 /* compute d_nnz for preallocation */ 1921 PetscCall(PetscArrayzero(d_nnz,na)); 1922 for (i=0; i<ai[ma]; i++) d_nnz[aj[i]]++; 1923 /* compute local off-diagonal contributions */ 1924 PetscCall(PetscArrayzero(g_nnz,nb)); 1925 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1926 /* map those to global */ 1927 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A),&sf)); 1928 PetscCall(PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray)); 1929 PetscCall(PetscSFSetFromOptions(sf)); 1930 PetscCall(PetscArrayzero(o_nnz,na)); 1931 PetscCall(PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1932 PetscCall(PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM)); 1933 PetscCall(PetscSFDestroy(&sf)); 1934 1935 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 1936 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 1937 PetscCall(MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1938 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 1939 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 1940 PetscCall(PetscFree4(d_nnz,o_nnz,g_nnz,oloc)); 1941 } else { 1942 B = *matout; 1943 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 1944 } 1945 1946 b = (Mat_MPIAIJ*)B->data; 1947 A_diag = a->A; 1948 B_diag = &b->A; 1949 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 1950 A_diag_ncol = A_diag->cmap->N; 1951 B_diag_ilen = sub_B_diag->ilen; 1952 B_diag_i = sub_B_diag->i; 1953 1954 /* Set ilen for diagonal of B */ 1955 for (i=0; i<A_diag_ncol; i++) { 1956 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 1957 } 1958 1959 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1960 very quickly (=without using MatSetValues), because all writes are local. */ 1961 PetscCall(MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag)); 1962 1963 /* copy over the B part */ 1964 PetscCall(PetscMalloc1(bi[mb],&cols)); 1965 PetscCall(MatSeqAIJGetArrayRead(a->B,&bv)); 1966 pbv = bv; 1967 row = A->rmap->rstart; 1968 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1969 cols_tmp = cols; 1970 for (i=0; i<mb; i++) { 1971 ncol = bi[i+1]-bi[i]; 1972 PetscCall(MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES)); 1973 row++; 1974 pbv += ncol; cols_tmp += ncol; 1975 } 1976 PetscCall(PetscFree(cols)); 1977 PetscCall(MatSeqAIJRestoreArrayRead(a->B,&bv)); 1978 1979 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1981 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1982 *matout = B; 1983 } else { 1984 PetscCall(MatHeaderMerge(A,&B)); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1990 { 1991 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1992 Mat a = aij->A,b = aij->B; 1993 PetscInt s1,s2,s3; 1994 1995 PetscFunctionBegin; 1996 PetscCall(MatGetLocalSize(mat,&s2,&s3)); 1997 if (rr) { 1998 PetscCall(VecGetLocalSize(rr,&s1)); 1999 PetscCheck(s1==s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 PetscCall(VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2002 } 2003 if (ll) { 2004 PetscCall(VecGetLocalSize(ll,&s1)); 2005 PetscCheck(s1==s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2006 PetscCall((*b->ops->diagonalscale)(b,ll,NULL)); 2007 } 2008 /* scale the diagonal block */ 2009 PetscCall((*a->ops->diagonalscale)(a,ll,rr)); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 PetscCall(VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD)); 2014 PetscCall((*b->ops->diagonalscale)(b,NULL,aij->lvec)); 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(MatSetUnfactored(a->A)); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2029 { 2030 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2031 Mat a,b,c,d; 2032 PetscBool flg; 2033 2034 PetscFunctionBegin; 2035 a = matA->A; b = matA->B; 2036 c = matB->A; d = matB->B; 2037 2038 PetscCall(MatEqual(a,c,&flg)); 2039 if (flg) { 2040 PetscCall(MatEqual(b,d,&flg)); 2041 } 2042 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2043 PetscFunctionReturn(0); 2044 } 2045 2046 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2047 { 2048 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2049 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2050 2051 PetscFunctionBegin; 2052 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2053 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2054 /* because of the column compression in the off-processor part of the matrix a->B, 2055 the number of columns in a->B and b->B may be different, hence we cannot call 2056 the MatCopy() directly on the two parts. If need be, we can provide a more 2057 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2058 then copying the submatrices */ 2059 PetscCall(MatCopy_Basic(A,B,str)); 2060 } else { 2061 PetscCall(MatCopy(a->A,b->A,str)); 2062 PetscCall(MatCopy(a->B,b->B,str)); 2063 } 2064 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2069 { 2070 PetscFunctionBegin; 2071 PetscCall(MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /* 2076 Computes the number of nonzeros per row needed for preallocation when X and Y 2077 have different nonzero structure. 2078 */ 2079 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2080 { 2081 PetscInt i,j,k,nzx,nzy; 2082 2083 PetscFunctionBegin; 2084 /* Set the number of nonzeros in the new matrix */ 2085 for (i=0; i<m; i++) { 2086 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2087 nzx = xi[i+1] - xi[i]; 2088 nzy = yi[i+1] - yi[i]; 2089 nnz[i] = 0; 2090 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2091 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2092 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2093 nnz[i]++; 2094 } 2095 for (; k<nzy; k++) nnz[i]++; 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2102 { 2103 PetscInt m = Y->rmap->N; 2104 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2105 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2106 2107 PetscFunctionBegin; 2108 PetscCall(MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz)); 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2113 { 2114 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2115 2116 PetscFunctionBegin; 2117 if (str == SAME_NONZERO_PATTERN) { 2118 PetscCall(MatAXPY(yy->A,a,xx->A,str)); 2119 PetscCall(MatAXPY(yy->B,a,xx->B,str)); 2120 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2121 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2122 } else { 2123 Mat B; 2124 PetscInt *nnz_d,*nnz_o; 2125 2126 PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d)); 2127 PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o)); 2128 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2129 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2130 PetscCall(MatSetLayouts(B,Y->rmap,Y->cmap)); 2131 PetscCall(MatSetType(B,((PetscObject)Y)->type_name)); 2132 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d)); 2133 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o)); 2134 PetscCall(MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o)); 2135 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2136 PetscCall(MatHeaderMerge(Y,&B)); 2137 PetscCall(PetscFree(nnz_d)); 2138 PetscCall(PetscFree(nnz_o)); 2139 } 2140 PetscFunctionReturn(0); 2141 } 2142 2143 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2144 2145 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2146 { 2147 PetscFunctionBegin; 2148 if (PetscDefined(USE_COMPLEX)) { 2149 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2150 2151 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2152 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2153 } 2154 PetscFunctionReturn(0); 2155 } 2156 2157 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2158 { 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2160 2161 PetscFunctionBegin; 2162 PetscCall(MatRealPart(a->A)); 2163 PetscCall(MatRealPart(a->B)); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2170 2171 PetscFunctionBegin; 2172 PetscCall(MatImaginaryPart(a->A)); 2173 PetscCall(MatImaginaryPart(a->B)); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2180 PetscInt i,*idxb = NULL,m = A->rmap->n; 2181 PetscScalar *va,*vv; 2182 Vec vB,vA; 2183 const PetscScalar *vb; 2184 2185 PetscFunctionBegin; 2186 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vA)); 2187 PetscCall(MatGetRowMaxAbs(a->A,vA,idx)); 2188 2189 PetscCall(VecGetArrayWrite(vA,&va)); 2190 if (idx) { 2191 for (i=0; i<m; i++) { 2192 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2193 } 2194 } 2195 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF,m,&vB)); 2197 PetscCall(PetscMalloc1(m,&idxb)); 2198 PetscCall(MatGetRowMaxAbs(a->B,vB,idxb)); 2199 2200 PetscCall(VecGetArrayWrite(v,&vv)); 2201 PetscCall(VecGetArrayRead(vB,&vb)); 2202 for (i=0; i<m; i++) { 2203 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2204 vv[i] = vb[i]; 2205 if (idx) idx[i] = a->garray[idxb[i]]; 2206 } else { 2207 vv[i] = va[i]; 2208 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) 2209 idx[i] = a->garray[idxb[i]]; 2210 } 2211 } 2212 PetscCall(VecRestoreArrayWrite(vA,&vv)); 2213 PetscCall(VecRestoreArrayWrite(vA,&va)); 2214 PetscCall(VecRestoreArrayRead(vB,&vb)); 2215 PetscCall(PetscFree(idxb)); 2216 PetscCall(VecDestroy(&vA)); 2217 PetscCall(VecDestroy(&vB)); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2222 { 2223 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2224 PetscInt m = A->rmap->n,n = A->cmap->n; 2225 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2226 PetscInt *cmap = mat->garray; 2227 PetscInt *diagIdx, *offdiagIdx; 2228 Vec diagV, offdiagV; 2229 PetscScalar *a, *diagA, *offdiagA; 2230 const PetscScalar *ba,*bav; 2231 PetscInt r,j,col,ncols,*bi,*bj; 2232 Mat B = mat->B; 2233 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2234 2235 PetscFunctionBegin; 2236 /* When a process holds entire A and other processes have no entry */ 2237 if (A->cmap->N == n) { 2238 PetscCall(VecGetArrayWrite(v,&diagA)); 2239 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2240 PetscCall(MatGetRowMinAbs(mat->A,diagV,idx)); 2241 PetscCall(VecDestroy(&diagV)); 2242 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2243 PetscFunctionReturn(0); 2244 } else if (n == 0) { 2245 if (m) { 2246 PetscCall(VecGetArrayWrite(v,&a)); 2247 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;} 2248 PetscCall(VecRestoreArrayWrite(v,&a)); 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r+1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2268 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2269 offdiagA[r] = 0.0; 2270 2271 /* Find first hole in the cmap */ 2272 for (j=0; j<ncols; j++) { 2273 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2274 if (col > j && j < cstart) { 2275 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2276 break; 2277 } else if (col > j + n && j >= cstart) { 2278 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2279 break; 2280 } 2281 } 2282 if (j == ncols && ncols < A->cmap->N - n) { 2283 /* a hole is outside compressed Bcols */ 2284 if (ncols == 0) { 2285 if (cstart) { 2286 offdiagIdx[r] = 0; 2287 } else offdiagIdx[r] = cend; 2288 } else { /* ncols > 0 */ 2289 offdiagIdx[r] = cmap[ncols-1] + 1; 2290 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2291 } 2292 } 2293 } 2294 2295 for (j=0; j<ncols; j++) { 2296 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2297 ba++; bj++; 2298 } 2299 } 2300 2301 PetscCall(VecGetArrayWrite(v, &a)); 2302 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2303 for (r = 0; r < m; ++r) { 2304 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2305 a[r] = diagA[r]; 2306 if (idx) idx[r] = cstart + diagIdx[r]; 2307 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2308 a[r] = diagA[r]; 2309 if (idx) { 2310 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2311 idx[r] = cstart + diagIdx[r]; 2312 } else idx[r] = offdiagIdx[r]; 2313 } 2314 } else { 2315 a[r] = offdiagA[r]; 2316 if (idx) idx[r] = offdiagIdx[r]; 2317 } 2318 } 2319 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2320 PetscCall(VecRestoreArrayWrite(v, &a)); 2321 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2322 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2323 PetscCall(VecDestroy(&diagV)); 2324 PetscCall(VecDestroy(&offdiagV)); 2325 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2330 { 2331 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2332 PetscInt m = A->rmap->n,n = A->cmap->n; 2333 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2334 PetscInt *cmap = mat->garray; 2335 PetscInt *diagIdx, *offdiagIdx; 2336 Vec diagV, offdiagV; 2337 PetscScalar *a, *diagA, *offdiagA; 2338 const PetscScalar *ba,*bav; 2339 PetscInt r,j,col,ncols,*bi,*bj; 2340 Mat B = mat->B; 2341 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2342 2343 PetscFunctionBegin; 2344 /* When a process holds entire A and other processes have no entry */ 2345 if (A->cmap->N == n) { 2346 PetscCall(VecGetArrayWrite(v,&diagA)); 2347 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2348 PetscCall(MatGetRowMin(mat->A,diagV,idx)); 2349 PetscCall(VecDestroy(&diagV)); 2350 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2351 PetscFunctionReturn(0); 2352 } else if (n == 0) { 2353 if (m) { 2354 PetscCall(VecGetArrayWrite(v,&a)); 2355 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;} 2356 PetscCall(VecRestoreArrayWrite(v,&a)); 2357 } 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscCall(PetscCalloc2(m,&diagIdx,m,&offdiagIdx)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2364 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2365 2366 /* Get offdiagIdx[] for implicit 0.0 */ 2367 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2368 ba = bav; 2369 bi = b->i; 2370 bj = b->j; 2371 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2372 for (r = 0; r < m; r++) { 2373 ncols = bi[r+1] - bi[r]; 2374 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2375 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2376 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2377 offdiagA[r] = 0.0; 2378 2379 /* Find first hole in the cmap */ 2380 for (j=0; j<ncols; j++) { 2381 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2382 if (col > j && j < cstart) { 2383 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2384 break; 2385 } else if (col > j + n && j >= cstart) { 2386 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2387 break; 2388 } 2389 } 2390 if (j == ncols && ncols < A->cmap->N - n) { 2391 /* a hole is outside compressed Bcols */ 2392 if (ncols == 0) { 2393 if (cstart) { 2394 offdiagIdx[r] = 0; 2395 } else offdiagIdx[r] = cend; 2396 } else { /* ncols > 0 */ 2397 offdiagIdx[r] = cmap[ncols-1] + 1; 2398 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2399 } 2400 } 2401 } 2402 2403 for (j=0; j<ncols; j++) { 2404 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2405 ba++; bj++; 2406 } 2407 } 2408 2409 PetscCall(VecGetArrayWrite(v, &a)); 2410 PetscCall(VecGetArrayRead(diagV, (const PetscScalar**)&diagA)); 2411 for (r = 0; r < m; ++r) { 2412 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2413 a[r] = diagA[r]; 2414 if (idx) idx[r] = cstart + diagIdx[r]; 2415 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) { 2418 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2419 idx[r] = cstart + diagIdx[r]; 2420 } else idx[r] = offdiagIdx[r]; 2421 } 2422 } else { 2423 a[r] = offdiagA[r]; 2424 if (idx) idx[r] = offdiagIdx[r]; 2425 } 2426 } 2427 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2428 PetscCall(VecRestoreArrayWrite(v, &a)); 2429 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2430 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2431 PetscCall(VecDestroy(&diagV)); 2432 PetscCall(VecDestroy(&offdiagV)); 2433 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 2440 PetscInt m = A->rmap->n,n = A->cmap->n; 2441 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend; 2442 PetscInt *cmap = mat->garray; 2443 PetscInt *diagIdx, *offdiagIdx; 2444 Vec diagV, offdiagV; 2445 PetscScalar *a, *diagA, *offdiagA; 2446 const PetscScalar *ba,*bav; 2447 PetscInt r,j,col,ncols,*bi,*bj; 2448 Mat B = mat->B; 2449 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 2450 2451 PetscFunctionBegin; 2452 /* When a process holds entire A and other processes have no entry */ 2453 if (A->cmap->N == n) { 2454 PetscCall(VecGetArrayWrite(v,&diagA)); 2455 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV)); 2456 PetscCall(MatGetRowMax(mat->A,diagV,idx)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecRestoreArrayWrite(v,&diagA)); 2459 PetscFunctionReturn(0); 2460 } else if (n == 0) { 2461 if (m) { 2462 PetscCall(VecGetArrayWrite(v,&a)); 2463 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;} 2464 PetscCall(VecRestoreArrayWrite(v,&a)); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscCall(PetscMalloc2(m,&diagIdx,m,&offdiagIdx)); 2470 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2471 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2472 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2473 2474 /* Get offdiagIdx[] for implicit 0.0 */ 2475 PetscCall(MatSeqAIJGetArrayRead(B,&bav)); 2476 ba = bav; 2477 bi = b->i; 2478 bj = b->j; 2479 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2480 for (r = 0; r < m; r++) { 2481 ncols = bi[r+1] - bi[r]; 2482 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2483 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0]; 2484 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2485 offdiagA[r] = 0.0; 2486 2487 /* Find first hole in the cmap */ 2488 for (j=0; j<ncols; j++) { 2489 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2490 if (col > j && j < cstart) { 2491 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2492 break; 2493 } else if (col > j + n && j >= cstart) { 2494 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2495 break; 2496 } 2497 } 2498 if (j == ncols && ncols < A->cmap->N - n) { 2499 /* a hole is outside compressed Bcols */ 2500 if (ncols == 0) { 2501 if (cstart) { 2502 offdiagIdx[r] = 0; 2503 } else offdiagIdx[r] = cend; 2504 } else { /* ncols > 0 */ 2505 offdiagIdx[r] = cmap[ncols-1] + 1; 2506 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2507 } 2508 } 2509 } 2510 2511 for (j=0; j<ncols; j++) { 2512 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];} 2513 ba++; bj++; 2514 } 2515 } 2516 2517 PetscCall(VecGetArrayWrite(v, &a)); 2518 PetscCall(VecGetArrayRead(diagV,(const PetscScalar**)&diagA)); 2519 for (r = 0; r < m; ++r) { 2520 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2521 a[r] = diagA[r]; 2522 if (idx) idx[r] = cstart + diagIdx[r]; 2523 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2524 a[r] = diagA[r]; 2525 if (idx) { 2526 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2527 idx[r] = cstart + diagIdx[r]; 2528 } else idx[r] = offdiagIdx[r]; 2529 } 2530 } else { 2531 a[r] = offdiagA[r]; 2532 if (idx) idx[r] = offdiagIdx[r]; 2533 } 2534 } 2535 PetscCall(MatSeqAIJRestoreArrayRead(B,&bav)); 2536 PetscCall(VecRestoreArrayWrite(v, &a)); 2537 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA)); 2538 PetscCall(VecRestoreArrayWrite(offdiagV,&offdiagA)); 2539 PetscCall(VecDestroy(&diagV)); 2540 PetscCall(VecDestroy(&offdiagV)); 2541 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2546 { 2547 Mat *dummy; 2548 2549 PetscFunctionBegin; 2550 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy)); 2551 *newmat = *dummy; 2552 PetscCall(PetscFree(dummy)); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2557 { 2558 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2559 2560 PetscFunctionBegin; 2561 PetscCall(MatInvertBlockDiagonal(a->A,values)); 2562 A->factorerrortype = a->A->factorerrortype; 2563 PetscFunctionReturn(0); 2564 } 2565 2566 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2567 { 2568 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2569 2570 PetscFunctionBegin; 2571 PetscCheck(x->assembled || x->preallocated,PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2572 PetscCall(MatSetRandom(aij->A,rctx)); 2573 if (x->assembled) { 2574 PetscCall(MatSetRandom(aij->B,rctx)); 2575 } else { 2576 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx)); 2577 } 2578 PetscCall(MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY)); 2579 PetscCall(MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY)); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2584 { 2585 PetscFunctionBegin; 2586 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2587 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2588 PetscFunctionReturn(0); 2589 } 2590 2591 /*@ 2592 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2593 2594 Collective on Mat 2595 2596 Input Parameters: 2597 + A - the matrix 2598 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2599 2600 Level: advanced 2601 2602 @*/ 2603 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2604 { 2605 PetscFunctionBegin; 2606 PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc)); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2611 { 2612 PetscBool sc = PETSC_FALSE,flg; 2613 2614 PetscFunctionBegin; 2615 PetscOptionsHeadBegin(PetscOptionsObject,"MPIAIJ options"); 2616 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2617 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg)); 2618 if (flg) { 2619 PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A,sc)); 2620 } 2621 PetscOptionsHeadEnd(); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2626 { 2627 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2628 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2629 2630 PetscFunctionBegin; 2631 if (!Y->preallocated) { 2632 PetscCall(MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL)); 2633 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2634 PetscInt nonew = aij->nonew; 2635 PetscCall(MatSeqAIJSetPreallocation(maij->A,1,NULL)); 2636 aij->nonew = nonew; 2637 } 2638 PetscCall(MatShift_Basic(Y,a)); 2639 PetscFunctionReturn(0); 2640 } 2641 2642 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2643 { 2644 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2645 2646 PetscFunctionBegin; 2647 PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2648 PetscCall(MatMissingDiagonal(a->A,missing,d)); 2649 if (d) { 2650 PetscInt rstart; 2651 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 2652 *d += rstart; 2653 2654 } 2655 PetscFunctionReturn(0); 2656 } 2657 2658 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2659 { 2660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2661 2662 PetscFunctionBegin; 2663 PetscCall(MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag)); 2664 PetscFunctionReturn(0); 2665 } 2666 2667 /* -------------------------------------------------------------------*/ 2668 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2669 MatGetRow_MPIAIJ, 2670 MatRestoreRow_MPIAIJ, 2671 MatMult_MPIAIJ, 2672 /* 4*/ MatMultAdd_MPIAIJ, 2673 MatMultTranspose_MPIAIJ, 2674 MatMultTransposeAdd_MPIAIJ, 2675 NULL, 2676 NULL, 2677 NULL, 2678 /*10*/ NULL, 2679 NULL, 2680 NULL, 2681 MatSOR_MPIAIJ, 2682 MatTranspose_MPIAIJ, 2683 /*15*/ MatGetInfo_MPIAIJ, 2684 MatEqual_MPIAIJ, 2685 MatGetDiagonal_MPIAIJ, 2686 MatDiagonalScale_MPIAIJ, 2687 MatNorm_MPIAIJ, 2688 /*20*/ MatAssemblyBegin_MPIAIJ, 2689 MatAssemblyEnd_MPIAIJ, 2690 MatSetOption_MPIAIJ, 2691 MatZeroEntries_MPIAIJ, 2692 /*24*/ MatZeroRows_MPIAIJ, 2693 NULL, 2694 NULL, 2695 NULL, 2696 NULL, 2697 /*29*/ MatSetUp_MPIAIJ, 2698 NULL, 2699 NULL, 2700 MatGetDiagonalBlock_MPIAIJ, 2701 NULL, 2702 /*34*/ MatDuplicate_MPIAIJ, 2703 NULL, 2704 NULL, 2705 NULL, 2706 NULL, 2707 /*39*/ MatAXPY_MPIAIJ, 2708 MatCreateSubMatrices_MPIAIJ, 2709 MatIncreaseOverlap_MPIAIJ, 2710 MatGetValues_MPIAIJ, 2711 MatCopy_MPIAIJ, 2712 /*44*/ MatGetRowMax_MPIAIJ, 2713 MatScale_MPIAIJ, 2714 MatShift_MPIAIJ, 2715 MatDiagonalSet_MPIAIJ, 2716 MatZeroRowsColumns_MPIAIJ, 2717 /*49*/ MatSetRandom_MPIAIJ, 2718 MatGetRowIJ_MPIAIJ, 2719 MatRestoreRowIJ_MPIAIJ, 2720 NULL, 2721 NULL, 2722 /*54*/ MatFDColoringCreate_MPIXAIJ, 2723 NULL, 2724 MatSetUnfactored_MPIAIJ, 2725 MatPermute_MPIAIJ, 2726 NULL, 2727 /*59*/ MatCreateSubMatrix_MPIAIJ, 2728 MatDestroy_MPIAIJ, 2729 MatView_MPIAIJ, 2730 NULL, 2731 NULL, 2732 /*64*/ NULL, 2733 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2734 NULL, 2735 NULL, 2736 NULL, 2737 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2738 MatGetRowMinAbs_MPIAIJ, 2739 NULL, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*75*/ MatFDColoringApply_AIJ, 2744 MatSetFromOptions_MPIAIJ, 2745 NULL, 2746 NULL, 2747 MatFindZeroDiagonals_MPIAIJ, 2748 /*80*/ NULL, 2749 NULL, 2750 NULL, 2751 /*83*/ MatLoad_MPIAIJ, 2752 MatIsSymmetric_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 NULL, 2757 /*89*/ NULL, 2758 NULL, 2759 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2760 NULL, 2761 NULL, 2762 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2763 NULL, 2764 NULL, 2765 NULL, 2766 MatBindToCPU_MPIAIJ, 2767 /*99*/ MatProductSetFromOptions_MPIAIJ, 2768 NULL, 2769 NULL, 2770 MatConjugate_MPIAIJ, 2771 NULL, 2772 /*104*/MatSetValuesRow_MPIAIJ, 2773 MatRealPart_MPIAIJ, 2774 MatImaginaryPart_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*109*/NULL, 2778 NULL, 2779 MatGetRowMin_MPIAIJ, 2780 NULL, 2781 MatMissingDiagonal_MPIAIJ, 2782 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2783 NULL, 2784 MatGetGhosts_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*119*/MatMultDiagonalBlock_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 MatGetMultiProcBlock_MPIAIJ, 2792 /*124*/MatFindNonzeroRows_MPIAIJ, 2793 MatGetColumnReductions_MPIAIJ, 2794 MatInvertBlockDiagonal_MPIAIJ, 2795 MatInvertVariableBlockDiagonal_MPIAIJ, 2796 MatCreateSubMatricesMPI_MPIAIJ, 2797 /*129*/NULL, 2798 NULL, 2799 NULL, 2800 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2801 NULL, 2802 /*134*/NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 NULL, 2807 /*139*/MatSetBlockSizes_MPIAIJ, 2808 NULL, 2809 NULL, 2810 MatFDColoringSetUp_MPIXAIJ, 2811 MatFindOffBlockDiagonalEntries_MPIAIJ, 2812 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2813 /*145*/NULL, 2814 NULL, 2815 NULL 2816 }; 2817 2818 /* ----------------------------------------------------------------------------------------*/ 2819 2820 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2821 { 2822 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2823 2824 PetscFunctionBegin; 2825 PetscCall(MatStoreValues(aij->A)); 2826 PetscCall(MatStoreValues(aij->B)); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2831 { 2832 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2833 2834 PetscFunctionBegin; 2835 PetscCall(MatRetrieveValues(aij->A)); 2836 PetscCall(MatRetrieveValues(aij->B)); 2837 PetscFunctionReturn(0); 2838 } 2839 2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2841 { 2842 Mat_MPIAIJ *b; 2843 PetscMPIInt size; 2844 2845 PetscFunctionBegin; 2846 PetscCall(PetscLayoutSetUp(B->rmap)); 2847 PetscCall(PetscLayoutSetUp(B->cmap)); 2848 b = (Mat_MPIAIJ*)B->data; 2849 2850 #if defined(PETSC_USE_CTABLE) 2851 PetscCall(PetscTableDestroy(&b->colmap)); 2852 #else 2853 PetscCall(PetscFree(b->colmap)); 2854 #endif 2855 PetscCall(PetscFree(b->garray)); 2856 PetscCall(VecDestroy(&b->lvec)); 2857 PetscCall(VecScatterDestroy(&b->Mvctx)); 2858 2859 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2860 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 2861 PetscCall(MatDestroy(&b->B)); 2862 PetscCall(MatCreate(PETSC_COMM_SELF,&b->B)); 2863 PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0)); 2864 PetscCall(MatSetBlockSizesFromMats(b->B,B,B)); 2865 PetscCall(MatSetType(b->B,MATSEQAIJ)); 2866 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B)); 2867 2868 if (!B->preallocated) { 2869 PetscCall(MatCreate(PETSC_COMM_SELF,&b->A)); 2870 PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n)); 2871 PetscCall(MatSetBlockSizesFromMats(b->A,B,B)); 2872 PetscCall(MatSetType(b->A,MATSEQAIJ)); 2873 PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A)); 2874 } 2875 2876 PetscCall(MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz)); 2877 PetscCall(MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz)); 2878 B->preallocated = PETSC_TRUE; 2879 B->was_assembled = PETSC_FALSE; 2880 B->assembled = PETSC_FALSE; 2881 PetscFunctionReturn(0); 2882 } 2883 2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2885 { 2886 Mat_MPIAIJ *b; 2887 2888 PetscFunctionBegin; 2889 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2890 PetscCall(PetscLayoutSetUp(B->rmap)); 2891 PetscCall(PetscLayoutSetUp(B->cmap)); 2892 b = (Mat_MPIAIJ*)B->data; 2893 2894 #if defined(PETSC_USE_CTABLE) 2895 PetscCall(PetscTableDestroy(&b->colmap)); 2896 #else 2897 PetscCall(PetscFree(b->colmap)); 2898 #endif 2899 PetscCall(PetscFree(b->garray)); 2900 PetscCall(VecDestroy(&b->lvec)); 2901 PetscCall(VecScatterDestroy(&b->Mvctx)); 2902 2903 PetscCall(MatResetPreallocation(b->A)); 2904 PetscCall(MatResetPreallocation(b->B)); 2905 B->preallocated = PETSC_TRUE; 2906 B->was_assembled = PETSC_FALSE; 2907 B->assembled = PETSC_FALSE; 2908 PetscFunctionReturn(0); 2909 } 2910 2911 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2912 { 2913 Mat mat; 2914 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2915 2916 PetscFunctionBegin; 2917 *newmat = NULL; 2918 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat)); 2919 PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N)); 2920 PetscCall(MatSetBlockSizesFromMats(mat,matin,matin)); 2921 PetscCall(MatSetType(mat,((PetscObject)matin)->type_name)); 2922 a = (Mat_MPIAIJ*)mat->data; 2923 2924 mat->factortype = matin->factortype; 2925 mat->assembled = matin->assembled; 2926 mat->insertmode = NOT_SET_VALUES; 2927 mat->preallocated = matin->preallocated; 2928 2929 a->size = oldmat->size; 2930 a->rank = oldmat->rank; 2931 a->donotstash = oldmat->donotstash; 2932 a->roworiented = oldmat->roworiented; 2933 a->rowindices = NULL; 2934 a->rowvalues = NULL; 2935 a->getrowactive = PETSC_FALSE; 2936 2937 PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap)); 2938 PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap)); 2939 2940 if (oldmat->colmap) { 2941 #if defined(PETSC_USE_CTABLE) 2942 PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap)); 2943 #else 2944 PetscCall(PetscMalloc1(mat->cmap->N,&a->colmap)); 2945 PetscCall(PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt))); 2946 PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N)); 2947 #endif 2948 } else a->colmap = NULL; 2949 if (oldmat->garray) { 2950 PetscInt len; 2951 len = oldmat->B->cmap->n; 2952 PetscCall(PetscMalloc1(len+1,&a->garray)); 2953 PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt))); 2954 if (len) PetscCall(PetscArraycpy(a->garray,oldmat->garray,len)); 2955 } else a->garray = NULL; 2956 2957 /* It may happen MatDuplicate is called with a non-assembled matrix 2958 In fact, MatDuplicate only requires the matrix to be preallocated 2959 This may happen inside a DMCreateMatrix_Shell */ 2960 if (oldmat->lvec) { 2961 PetscCall(VecDuplicate(oldmat->lvec,&a->lvec)); 2962 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec)); 2963 } 2964 if (oldmat->Mvctx) { 2965 PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx)); 2966 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx)); 2967 } 2968 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2969 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2970 PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B)); 2971 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B)); 2972 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist)); 2973 *newmat = mat; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2978 { 2979 PetscBool isbinary, ishdf5; 2980 2981 PetscFunctionBegin; 2982 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2983 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2984 /* force binary viewer to load .info file if it has not yet done so */ 2985 PetscCall(PetscViewerSetUp(viewer)); 2986 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2987 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2988 if (isbinary) { 2989 PetscCall(MatLoad_MPIAIJ_Binary(newMat,viewer)); 2990 } else if (ishdf5) { 2991 #if defined(PETSC_HAVE_HDF5) 2992 PetscCall(MatLoad_AIJ_HDF5(newMat,viewer)); 2993 #else 2994 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2995 #endif 2996 } else { 2997 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2998 } 2999 PetscFunctionReturn(0); 3000 } 3001 3002 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3003 { 3004 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 3005 PetscInt *rowidxs,*colidxs; 3006 PetscScalar *matvals; 3007 3008 PetscFunctionBegin; 3009 PetscCall(PetscViewerSetUp(viewer)); 3010 3011 /* read in matrix header */ 3012 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3013 PetscCheck(header[0] == MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3014 M = header[1]; N = header[2]; nz = header[3]; 3015 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3016 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3017 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 3018 3019 /* set block sizes from the viewer's .info file */ 3020 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3021 /* set global sizes if not set already */ 3022 if (mat->rmap->N < 0) mat->rmap->N = M; 3023 if (mat->cmap->N < 0) mat->cmap->N = N; 3024 PetscCall(PetscLayoutSetUp(mat->rmap)); 3025 PetscCall(PetscLayoutSetUp(mat->cmap)); 3026 3027 /* check if the matrix sizes are correct */ 3028 PetscCall(MatGetSize(mat,&rows,&cols)); 3029 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3030 3031 /* read in row lengths and build row indices */ 3032 PetscCall(MatGetLocalSize(mat,&m,NULL)); 3033 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3034 PetscCall(PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT)); 3035 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3036 PetscCall(MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer))); 3037 PetscCheck(sum == nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3038 /* read in column indices and matrix values */ 3039 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals)); 3040 PetscCall(PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT)); 3041 PetscCall(PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR)); 3042 /* store matrix indices and values */ 3043 PetscCall(MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals)); 3044 PetscCall(PetscFree(rowidxs)); 3045 PetscCall(PetscFree2(colidxs,matvals)); 3046 PetscFunctionReturn(0); 3047 } 3048 3049 /* Not scalable because of ISAllGather() unless getting all columns. */ 3050 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3051 { 3052 IS iscol_local; 3053 PetscBool isstride; 3054 PetscMPIInt lisstride=0,gisstride; 3055 3056 PetscFunctionBegin; 3057 /* check if we are grabbing all columns*/ 3058 PetscCall(PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride)); 3059 3060 if (isstride) { 3061 PetscInt start,len,mstart,mlen; 3062 PetscCall(ISStrideGetInfo(iscol,&start,NULL)); 3063 PetscCall(ISGetLocalSize(iscol,&len)); 3064 PetscCall(MatGetOwnershipRangeColumn(mat,&mstart,&mlen)); 3065 if (mstart == start && mlen-mstart == len) lisstride = 1; 3066 } 3067 3068 PetscCall(MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat))); 3069 if (gisstride) { 3070 PetscInt N; 3071 PetscCall(MatGetSize(mat,NULL,&N)); 3072 PetscCall(ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local)); 3073 PetscCall(ISSetIdentity(iscol_local)); 3074 PetscCall(PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3075 } else { 3076 PetscInt cbs; 3077 PetscCall(ISGetBlockSize(iscol,&cbs)); 3078 PetscCall(ISAllGather(iscol,&iscol_local)); 3079 PetscCall(ISSetBlockSize(iscol_local,cbs)); 3080 } 3081 3082 *isseq = iscol_local; 3083 PetscFunctionReturn(0); 3084 } 3085 3086 /* 3087 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3088 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3089 3090 Input Parameters: 3091 mat - matrix 3092 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3093 i.e., mat->rstart <= isrow[i] < mat->rend 3094 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3095 i.e., mat->cstart <= iscol[i] < mat->cend 3096 Output Parameter: 3097 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3098 iscol_o - sequential column index set for retrieving mat->B 3099 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3100 */ 3101 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3102 { 3103 Vec x,cmap; 3104 const PetscInt *is_idx; 3105 PetscScalar *xarray,*cmaparray; 3106 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3107 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3108 Mat B=a->B; 3109 Vec lvec=a->lvec,lcmap; 3110 PetscInt i,cstart,cend,Bn=B->cmap->N; 3111 MPI_Comm comm; 3112 VecScatter Mvctx=a->Mvctx; 3113 3114 PetscFunctionBegin; 3115 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3116 PetscCall(ISGetLocalSize(iscol,&ncols)); 3117 3118 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3119 PetscCall(MatCreateVecs(mat,&x,NULL)); 3120 PetscCall(VecSet(x,-1.0)); 3121 PetscCall(VecDuplicate(x,&cmap)); 3122 PetscCall(VecSet(cmap,-1.0)); 3123 3124 /* Get start indices */ 3125 PetscCallMPI(MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm)); 3126 isstart -= ncols; 3127 PetscCall(MatGetOwnershipRangeColumn(mat,&cstart,&cend)); 3128 3129 PetscCall(ISGetIndices(iscol,&is_idx)); 3130 PetscCall(VecGetArray(x,&xarray)); 3131 PetscCall(VecGetArray(cmap,&cmaparray)); 3132 PetscCall(PetscMalloc1(ncols,&idx)); 3133 for (i=0; i<ncols; i++) { 3134 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3135 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3136 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3137 } 3138 PetscCall(VecRestoreArray(x,&xarray)); 3139 PetscCall(VecRestoreArray(cmap,&cmaparray)); 3140 PetscCall(ISRestoreIndices(iscol,&is_idx)); 3141 3142 /* Get iscol_d */ 3143 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d)); 3144 PetscCall(ISGetBlockSize(iscol,&i)); 3145 PetscCall(ISSetBlockSize(*iscol_d,i)); 3146 3147 /* Get isrow_d */ 3148 PetscCall(ISGetLocalSize(isrow,&m)); 3149 rstart = mat->rmap->rstart; 3150 PetscCall(PetscMalloc1(m,&idx)); 3151 PetscCall(ISGetIndices(isrow,&is_idx)); 3152 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3153 PetscCall(ISRestoreIndices(isrow,&is_idx)); 3154 3155 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d)); 3156 PetscCall(ISGetBlockSize(isrow,&i)); 3157 PetscCall(ISSetBlockSize(*isrow_d,i)); 3158 3159 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3160 PetscCall(VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3161 PetscCall(VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD)); 3162 3163 PetscCall(VecDuplicate(lvec,&lcmap)); 3164 3165 PetscCall(VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3166 PetscCall(VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD)); 3167 3168 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3169 /* off-process column indices */ 3170 count = 0; 3171 PetscCall(PetscMalloc1(Bn,&idx)); 3172 PetscCall(PetscMalloc1(Bn,&cmap1)); 3173 3174 PetscCall(VecGetArray(lvec,&xarray)); 3175 PetscCall(VecGetArray(lcmap,&cmaparray)); 3176 for (i=0; i<Bn; i++) { 3177 if (PetscRealPart(xarray[i]) > -1.0) { 3178 idx[count] = i; /* local column index in off-diagonal part B */ 3179 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3180 count++; 3181 } 3182 } 3183 PetscCall(VecRestoreArray(lvec,&xarray)); 3184 PetscCall(VecRestoreArray(lcmap,&cmaparray)); 3185 3186 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o)); 3187 /* cannot ensure iscol_o has same blocksize as iscol! */ 3188 3189 PetscCall(PetscFree(idx)); 3190 *garray = cmap1; 3191 3192 PetscCall(VecDestroy(&x)); 3193 PetscCall(VecDestroy(&cmap)); 3194 PetscCall(VecDestroy(&lcmap)); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3199 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3200 { 3201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3202 Mat M = NULL; 3203 MPI_Comm comm; 3204 IS iscol_d,isrow_d,iscol_o; 3205 Mat Asub = NULL,Bsub = NULL; 3206 PetscInt n; 3207 3208 PetscFunctionBegin; 3209 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3210 3211 if (call == MAT_REUSE_MATRIX) { 3212 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3213 PetscCall(PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d)); 3214 PetscCheck(isrow_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3215 3216 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d)); 3217 PetscCheck(iscol_d,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3218 3219 PetscCall(PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o)); 3220 PetscCheck(iscol_o,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3221 3222 /* Update diagonal and off-diagonal portions of submat */ 3223 asub = (Mat_MPIAIJ*)(*submat)->data; 3224 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A)); 3225 PetscCall(ISGetLocalSize(iscol_o,&n)); 3226 if (n) { 3227 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B)); 3228 } 3229 PetscCall(MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY)); 3230 PetscCall(MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY)); 3231 3232 } else { /* call == MAT_INITIAL_MATRIX) */ 3233 const PetscInt *garray; 3234 PetscInt BsubN; 3235 3236 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3237 PetscCall(ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray)); 3238 3239 /* Create local submatrices Asub and Bsub */ 3240 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub)); 3241 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub)); 3242 3243 /* Create submatrix M */ 3244 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M)); 3245 3246 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3247 asub = (Mat_MPIAIJ*)M->data; 3248 3249 PetscCall(ISGetLocalSize(iscol_o,&BsubN)); 3250 n = asub->B->cmap->N; 3251 if (BsubN > n) { 3252 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3253 const PetscInt *idx; 3254 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3255 PetscCall(PetscInfo(M,"submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n",n,BsubN)); 3256 3257 PetscCall(PetscMalloc1(n,&idx_new)); 3258 j = 0; 3259 PetscCall(ISGetIndices(iscol_o,&idx)); 3260 for (i=0; i<n; i++) { 3261 if (j >= BsubN) break; 3262 while (subgarray[i] > garray[j]) j++; 3263 3264 if (subgarray[i] == garray[j]) { 3265 idx_new[i] = idx[j++]; 3266 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT,i,subgarray[i],j,garray[j]); 3267 } 3268 PetscCall(ISRestoreIndices(iscol_o,&idx)); 3269 3270 PetscCall(ISDestroy(&iscol_o)); 3271 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o)); 3272 3273 } else if (BsubN < n) { 3274 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")",BsubN,asub->B->cmap->N); 3275 } 3276 3277 PetscCall(PetscFree(garray)); 3278 *submat = M; 3279 3280 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3281 PetscCall(PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d)); 3282 PetscCall(ISDestroy(&isrow_d)); 3283 3284 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d)); 3285 PetscCall(ISDestroy(&iscol_d)); 3286 3287 PetscCall(PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o)); 3288 PetscCall(ISDestroy(&iscol_o)); 3289 } 3290 PetscFunctionReturn(0); 3291 } 3292 3293 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3294 { 3295 IS iscol_local=NULL,isrow_d; 3296 PetscInt csize; 3297 PetscInt n,i,j,start,end; 3298 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3299 MPI_Comm comm; 3300 3301 PetscFunctionBegin; 3302 /* If isrow has same processor distribution as mat, 3303 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3304 if (call == MAT_REUSE_MATRIX) { 3305 PetscCall(PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d)); 3306 if (isrow_d) { 3307 sameRowDist = PETSC_TRUE; 3308 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3309 } else { 3310 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local)); 3311 if (iscol_local) { 3312 sameRowDist = PETSC_TRUE; 3313 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3314 } 3315 } 3316 } else { 3317 /* Check if isrow has same processor distribution as mat */ 3318 sameDist[0] = PETSC_FALSE; 3319 PetscCall(ISGetLocalSize(isrow,&n)); 3320 if (!n) { 3321 sameDist[0] = PETSC_TRUE; 3322 } else { 3323 PetscCall(ISGetMinMax(isrow,&i,&j)); 3324 PetscCall(MatGetOwnershipRange(mat,&start,&end)); 3325 if (i >= start && j < end) { 3326 sameDist[0] = PETSC_TRUE; 3327 } 3328 } 3329 3330 /* Check if iscol has same processor distribution as mat */ 3331 sameDist[1] = PETSC_FALSE; 3332 PetscCall(ISGetLocalSize(iscol,&n)); 3333 if (!n) { 3334 sameDist[1] = PETSC_TRUE; 3335 } else { 3336 PetscCall(ISGetMinMax(iscol,&i,&j)); 3337 PetscCall(MatGetOwnershipRangeColumn(mat,&start,&end)); 3338 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3339 } 3340 3341 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3342 PetscCall(MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm)); 3343 sameRowDist = tsameDist[0]; 3344 } 3345 3346 if (sameRowDist) { 3347 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3348 /* isrow and iscol have same processor distribution as mat */ 3349 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat)); 3350 PetscFunctionReturn(0); 3351 } else { /* sameRowDist */ 3352 /* isrow has same processor distribution as mat */ 3353 if (call == MAT_INITIAL_MATRIX) { 3354 PetscBool sorted; 3355 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3356 PetscCall(ISGetLocalSize(iscol_local,&n)); /* local size of iscol_local = global columns of newmat */ 3357 PetscCall(ISGetSize(iscol,&i)); 3358 PetscCheck(n == i,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT,n,i); 3359 3360 PetscCall(ISSorted(iscol_local,&sorted)); 3361 if (sorted) { 3362 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3363 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat)); 3364 PetscFunctionReturn(0); 3365 } 3366 } else { /* call == MAT_REUSE_MATRIX */ 3367 IS iscol_sub; 3368 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3369 if (iscol_sub) { 3370 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat)); 3371 PetscFunctionReturn(0); 3372 } 3373 } 3374 } 3375 } 3376 3377 /* General case: iscol -> iscol_local which has global size of iscol */ 3378 if (call == MAT_REUSE_MATRIX) { 3379 PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local)); 3380 PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3381 } else { 3382 if (!iscol_local) { 3383 PetscCall(ISGetSeqIS_Private(mat,iscol,&iscol_local)); 3384 } 3385 } 3386 3387 PetscCall(ISGetLocalSize(iscol,&csize)); 3388 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat)); 3389 3390 if (call == MAT_INITIAL_MATRIX) { 3391 PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local)); 3392 PetscCall(ISDestroy(&iscol_local)); 3393 } 3394 PetscFunctionReturn(0); 3395 } 3396 3397 /*@C 3398 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3399 and "off-diagonal" part of the matrix in CSR format. 3400 3401 Collective 3402 3403 Input Parameters: 3404 + comm - MPI communicator 3405 . A - "diagonal" portion of matrix 3406 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3407 - garray - global index of B columns 3408 3409 Output Parameter: 3410 . mat - the matrix, with input A as its local diagonal matrix 3411 Level: advanced 3412 3413 Notes: 3414 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3415 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3416 3417 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3418 @*/ 3419 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3420 { 3421 Mat_MPIAIJ *maij; 3422 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3423 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3424 const PetscScalar *oa; 3425 Mat Bnew; 3426 PetscInt m,n,N; 3427 3428 PetscFunctionBegin; 3429 PetscCall(MatCreate(comm,mat)); 3430 PetscCall(MatGetSize(A,&m,&n)); 3431 PetscCheck(m == B->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %" PetscInt_FMT " != Bm %" PetscInt_FMT,m,B->rmap->N); 3432 PetscCheck(A->rmap->bs == B->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT,A->rmap->bs,B->rmap->bs); 3433 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3434 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3435 3436 /* Get global columns of mat */ 3437 PetscCall(MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm)); 3438 3439 PetscCall(MatSetSizes(*mat,m,n,PETSC_DECIDE,N)); 3440 PetscCall(MatSetType(*mat,MATMPIAIJ)); 3441 PetscCall(MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs)); 3442 maij = (Mat_MPIAIJ*)(*mat)->data; 3443 3444 (*mat)->preallocated = PETSC_TRUE; 3445 3446 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3447 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3448 3449 /* Set A as diagonal portion of *mat */ 3450 maij->A = A; 3451 3452 nz = oi[m]; 3453 for (i=0; i<nz; i++) { 3454 col = oj[i]; 3455 oj[i] = garray[col]; 3456 } 3457 3458 /* Set Bnew as off-diagonal portion of *mat */ 3459 PetscCall(MatSeqAIJGetArrayRead(B,&oa)); 3460 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew)); 3461 PetscCall(MatSeqAIJRestoreArrayRead(B,&oa)); 3462 bnew = (Mat_SeqAIJ*)Bnew->data; 3463 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3464 maij->B = Bnew; 3465 3466 PetscCheck(B->rmap->N == Bnew->rmap->N,PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT,B->rmap->N,Bnew->rmap->N); 3467 3468 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3469 b->free_a = PETSC_FALSE; 3470 b->free_ij = PETSC_FALSE; 3471 PetscCall(MatDestroy(&B)); 3472 3473 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3474 bnew->free_a = PETSC_TRUE; 3475 bnew->free_ij = PETSC_TRUE; 3476 3477 /* condense columns of maij->B */ 3478 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 3479 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3480 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3481 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 3482 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3487 3488 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3489 { 3490 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3491 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3492 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3493 Mat M,Msub,B=a->B; 3494 MatScalar *aa; 3495 Mat_SeqAIJ *aij; 3496 PetscInt *garray = a->garray,*colsub,Ncols; 3497 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3498 IS iscol_sub,iscmap; 3499 const PetscInt *is_idx,*cmap; 3500 PetscBool allcolumns=PETSC_FALSE; 3501 MPI_Comm comm; 3502 3503 PetscFunctionBegin; 3504 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3505 if (call == MAT_REUSE_MATRIX) { 3506 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub)); 3507 PetscCheck(iscol_sub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3508 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3509 3510 PetscCall(PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap)); 3511 PetscCheck(iscmap,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3512 3513 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub)); 3514 PetscCheck(Msub,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3515 3516 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub)); 3517 3518 } else { /* call == MAT_INITIAL_MATRIX) */ 3519 PetscBool flg; 3520 3521 PetscCall(ISGetLocalSize(iscol,&n)); 3522 PetscCall(ISGetSize(iscol,&Ncols)); 3523 3524 /* (1) iscol -> nonscalable iscol_local */ 3525 /* Check for special case: each processor gets entire matrix columns */ 3526 PetscCall(ISIdentity(iscol_local,&flg)); 3527 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3528 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3529 if (allcolumns) { 3530 iscol_sub = iscol_local; 3531 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3532 PetscCall(ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap)); 3533 3534 } else { 3535 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3536 PetscInt *idx,*cmap1,k; 3537 PetscCall(PetscMalloc1(Ncols,&idx)); 3538 PetscCall(PetscMalloc1(Ncols,&cmap1)); 3539 PetscCall(ISGetIndices(iscol_local,&is_idx)); 3540 count = 0; 3541 k = 0; 3542 for (i=0; i<Ncols; i++) { 3543 j = is_idx[i]; 3544 if (j >= cstart && j < cend) { 3545 /* diagonal part of mat */ 3546 idx[count] = j; 3547 cmap1[count++] = i; /* column index in submat */ 3548 } else if (Bn) { 3549 /* off-diagonal part of mat */ 3550 if (j == garray[k]) { 3551 idx[count] = j; 3552 cmap1[count++] = i; /* column index in submat */ 3553 } else if (j > garray[k]) { 3554 while (j > garray[k] && k < Bn-1) k++; 3555 if (j == garray[k]) { 3556 idx[count] = j; 3557 cmap1[count++] = i; /* column index in submat */ 3558 } 3559 } 3560 } 3561 } 3562 PetscCall(ISRestoreIndices(iscol_local,&is_idx)); 3563 3564 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub)); 3565 PetscCall(ISGetBlockSize(iscol,&cbs)); 3566 PetscCall(ISSetBlockSize(iscol_sub,cbs)); 3567 3568 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap)); 3569 } 3570 3571 /* (3) Create sequential Msub */ 3572 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub)); 3573 } 3574 3575 PetscCall(ISGetLocalSize(iscol_sub,&count)); 3576 aij = (Mat_SeqAIJ*)(Msub)->data; 3577 ii = aij->i; 3578 PetscCall(ISGetIndices(iscmap,&cmap)); 3579 3580 /* 3581 m - number of local rows 3582 Ncols - number of columns (same on all processors) 3583 rstart - first row in new global matrix generated 3584 */ 3585 PetscCall(MatGetSize(Msub,&m,NULL)); 3586 3587 if (call == MAT_INITIAL_MATRIX) { 3588 /* (4) Create parallel newmat */ 3589 PetscMPIInt rank,size; 3590 PetscInt csize; 3591 3592 PetscCallMPI(MPI_Comm_size(comm,&size)); 3593 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3594 3595 /* 3596 Determine the number of non-zeros in the diagonal and off-diagonal 3597 portions of the matrix in order to do correct preallocation 3598 */ 3599 3600 /* first get start and end of "diagonal" columns */ 3601 PetscCall(ISGetLocalSize(iscol,&csize)); 3602 if (csize == PETSC_DECIDE) { 3603 PetscCall(ISGetSize(isrow,&mglobal)); 3604 if (mglobal == Ncols) { /* square matrix */ 3605 nlocal = m; 3606 } else { 3607 nlocal = Ncols/size + ((Ncols % size) > rank); 3608 } 3609 } else { 3610 nlocal = csize; 3611 } 3612 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3613 rstart = rend - nlocal; 3614 PetscCheck(rank != size - 1 || rend == Ncols,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,Ncols); 3615 3616 /* next, compute all the lengths */ 3617 jj = aij->j; 3618 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3619 olens = dlens + m; 3620 for (i=0; i<m; i++) { 3621 jend = ii[i+1] - ii[i]; 3622 olen = 0; 3623 dlen = 0; 3624 for (j=0; j<jend; j++) { 3625 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3626 else dlen++; 3627 jj++; 3628 } 3629 olens[i] = olen; 3630 dlens[i] = dlen; 3631 } 3632 3633 PetscCall(ISGetBlockSize(isrow,&bs)); 3634 PetscCall(ISGetBlockSize(iscol,&cbs)); 3635 3636 PetscCall(MatCreate(comm,&M)); 3637 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols)); 3638 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3639 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3640 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3641 PetscCall(PetscFree(dlens)); 3642 3643 } else { /* call == MAT_REUSE_MATRIX */ 3644 M = *newmat; 3645 PetscCall(MatGetLocalSize(M,&i,NULL)); 3646 PetscCheck(i == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3647 PetscCall(MatZeroEntries(M)); 3648 /* 3649 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3650 rather than the slower MatSetValues(). 3651 */ 3652 M->was_assembled = PETSC_TRUE; 3653 M->assembled = PETSC_FALSE; 3654 } 3655 3656 /* (5) Set values of Msub to *newmat */ 3657 PetscCall(PetscMalloc1(count,&colsub)); 3658 PetscCall(MatGetOwnershipRange(M,&rstart,NULL)); 3659 3660 jj = aij->j; 3661 PetscCall(MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa)); 3662 for (i=0; i<m; i++) { 3663 row = rstart + i; 3664 nz = ii[i+1] - ii[i]; 3665 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3666 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES)); 3667 jj += nz; aa += nz; 3668 } 3669 PetscCall(MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa)); 3670 PetscCall(ISRestoreIndices(iscmap,&cmap)); 3671 3672 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3673 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3674 3675 PetscCall(PetscFree(colsub)); 3676 3677 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3678 if (call == MAT_INITIAL_MATRIX) { 3679 *newmat = M; 3680 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub)); 3681 PetscCall(MatDestroy(&Msub)); 3682 3683 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub)); 3684 PetscCall(ISDestroy(&iscol_sub)); 3685 3686 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap)); 3687 PetscCall(ISDestroy(&iscmap)); 3688 3689 if (iscol_local) { 3690 PetscCall(PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local)); 3691 PetscCall(ISDestroy(&iscol_local)); 3692 } 3693 } 3694 PetscFunctionReturn(0); 3695 } 3696 3697 /* 3698 Not great since it makes two copies of the submatrix, first an SeqAIJ 3699 in local and then by concatenating the local matrices the end result. 3700 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3701 3702 Note: This requires a sequential iscol with all indices. 3703 */ 3704 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3705 { 3706 PetscMPIInt rank,size; 3707 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3708 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3709 Mat M,Mreuse; 3710 MatScalar *aa,*vwork; 3711 MPI_Comm comm; 3712 Mat_SeqAIJ *aij; 3713 PetscBool colflag,allcolumns=PETSC_FALSE; 3714 3715 PetscFunctionBegin; 3716 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 3717 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 3718 PetscCallMPI(MPI_Comm_size(comm,&size)); 3719 3720 /* Check for special case: each processor gets entire matrix columns */ 3721 PetscCall(ISIdentity(iscol,&colflag)); 3722 PetscCall(ISGetLocalSize(iscol,&n)); 3723 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3724 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat))); 3725 3726 if (call == MAT_REUSE_MATRIX) { 3727 PetscCall(PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse)); 3728 PetscCheck(Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3729 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse)); 3730 } else { 3731 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse)); 3732 } 3733 3734 /* 3735 m - number of local rows 3736 n - number of columns (same on all processors) 3737 rstart - first row in new global matrix generated 3738 */ 3739 PetscCall(MatGetSize(Mreuse,&m,&n)); 3740 PetscCall(MatGetBlockSizes(Mreuse,&bs,&cbs)); 3741 if (call == MAT_INITIAL_MATRIX) { 3742 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3743 ii = aij->i; 3744 jj = aij->j; 3745 3746 /* 3747 Determine the number of non-zeros in the diagonal and off-diagonal 3748 portions of the matrix in order to do correct preallocation 3749 */ 3750 3751 /* first get start and end of "diagonal" columns */ 3752 if (csize == PETSC_DECIDE) { 3753 PetscCall(ISGetSize(isrow,&mglobal)); 3754 if (mglobal == n) { /* square matrix */ 3755 nlocal = m; 3756 } else { 3757 nlocal = n/size + ((n % size) > rank); 3758 } 3759 } else { 3760 nlocal = csize; 3761 } 3762 PetscCallMPI(MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm)); 3763 rstart = rend - nlocal; 3764 PetscCheck(rank != size - 1 || rend == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 3765 3766 /* next, compute all the lengths */ 3767 PetscCall(PetscMalloc1(2*m+1,&dlens)); 3768 olens = dlens + m; 3769 for (i=0; i<m; i++) { 3770 jend = ii[i+1] - ii[i]; 3771 olen = 0; 3772 dlen = 0; 3773 for (j=0; j<jend; j++) { 3774 if (*jj < rstart || *jj >= rend) olen++; 3775 else dlen++; 3776 jj++; 3777 } 3778 olens[i] = olen; 3779 dlens[i] = dlen; 3780 } 3781 PetscCall(MatCreate(comm,&M)); 3782 PetscCall(MatSetSizes(M,m,nlocal,PETSC_DECIDE,n)); 3783 PetscCall(MatSetBlockSizes(M,bs,cbs)); 3784 PetscCall(MatSetType(M,((PetscObject)mat)->type_name)); 3785 PetscCall(MatMPIAIJSetPreallocation(M,0,dlens,0,olens)); 3786 PetscCall(PetscFree(dlens)); 3787 } else { 3788 PetscInt ml,nl; 3789 3790 M = *newmat; 3791 PetscCall(MatGetLocalSize(M,&ml,&nl)); 3792 PetscCheck(ml == m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3793 PetscCall(MatZeroEntries(M)); 3794 /* 3795 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3796 rather than the slower MatSetValues(). 3797 */ 3798 M->was_assembled = PETSC_TRUE; 3799 M->assembled = PETSC_FALSE; 3800 } 3801 PetscCall(MatGetOwnershipRange(M,&rstart,&rend)); 3802 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3803 ii = aij->i; 3804 jj = aij->j; 3805 3806 /* trigger copy to CPU if needed */ 3807 PetscCall(MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa)); 3808 for (i=0; i<m; i++) { 3809 row = rstart + i; 3810 nz = ii[i+1] - ii[i]; 3811 cwork = jj; jj += nz; 3812 vwork = aa; aa += nz; 3813 PetscCall(MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES)); 3814 } 3815 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa)); 3816 3817 PetscCall(MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY)); 3818 PetscCall(MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY)); 3819 *newmat = M; 3820 3821 /* save submatrix used in processor for next request */ 3822 if (call == MAT_INITIAL_MATRIX) { 3823 PetscCall(PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse)); 3824 PetscCall(MatDestroy(&Mreuse)); 3825 } 3826 PetscFunctionReturn(0); 3827 } 3828 3829 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3830 { 3831 PetscInt m,cstart, cend,j,nnz,i,d; 3832 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3833 const PetscInt *JJ; 3834 PetscBool nooffprocentries; 3835 3836 PetscFunctionBegin; 3837 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %" PetscInt_FMT,Ii[0]); 3838 3839 PetscCall(PetscLayoutSetUp(B->rmap)); 3840 PetscCall(PetscLayoutSetUp(B->cmap)); 3841 m = B->rmap->n; 3842 cstart = B->cmap->rstart; 3843 cend = B->cmap->rend; 3844 rstart = B->rmap->rstart; 3845 3846 PetscCall(PetscCalloc2(m,&d_nnz,m,&o_nnz)); 3847 3848 if (PetscDefined(USE_DEBUG)) { 3849 for (i=0; i<m; i++) { 3850 nnz = Ii[i+1]- Ii[i]; 3851 JJ = J + Ii[i]; 3852 PetscCheck(nnz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns",i,nnz); 3853 PetscCheck(!nnz || !(JJ[0] < 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT,i,JJ[0]); 3854 PetscCheck(!nnz || !(JJ[nnz-1] >= B->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")",i,JJ[nnz-1],B->cmap->N); 3855 } 3856 } 3857 3858 for (i=0; i<m; i++) { 3859 nnz = Ii[i+1]- Ii[i]; 3860 JJ = J + Ii[i]; 3861 nnz_max = PetscMax(nnz_max,nnz); 3862 d = 0; 3863 for (j=0; j<nnz; j++) { 3864 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3865 } 3866 d_nnz[i] = d; 3867 o_nnz[i] = nnz - d; 3868 } 3869 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 3870 PetscCall(PetscFree2(d_nnz,o_nnz)); 3871 3872 for (i=0; i<m; i++) { 3873 ii = i + rstart; 3874 PetscCall(MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES)); 3875 } 3876 nooffprocentries = B->nooffprocentries; 3877 B->nooffprocentries = PETSC_TRUE; 3878 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3879 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3880 B->nooffprocentries = nooffprocentries; 3881 3882 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3883 PetscFunctionReturn(0); 3884 } 3885 3886 /*@ 3887 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3888 (the default parallel PETSc format). 3889 3890 Collective 3891 3892 Input Parameters: 3893 + B - the matrix 3894 . i - the indices into j for the start of each local row (starts with zero) 3895 . j - the column indices for each local row (starts with zero) 3896 - v - optional values in the matrix 3897 3898 Level: developer 3899 3900 Notes: 3901 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3902 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3903 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3904 3905 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3906 3907 The format which is used for the sparse matrix input, is equivalent to a 3908 row-major ordering.. i.e for the following matrix, the input data expected is 3909 as shown 3910 3911 $ 1 0 0 3912 $ 2 0 3 P0 3913 $ ------- 3914 $ 4 5 6 P1 3915 $ 3916 $ Process0 [P0]: rows_owned=[0,1] 3917 $ i = {0,1,3} [size = nrow+1 = 2+1] 3918 $ j = {0,0,2} [size = 3] 3919 $ v = {1,2,3} [size = 3] 3920 $ 3921 $ Process1 [P1]: rows_owned=[2] 3922 $ i = {0,3} [size = nrow+1 = 1+1] 3923 $ j = {0,1,2} [size = 3] 3924 $ v = {4,5,6} [size = 3] 3925 3926 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3927 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3928 @*/ 3929 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3930 { 3931 PetscFunctionBegin; 3932 PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v)); 3933 PetscFunctionReturn(0); 3934 } 3935 3936 /*@C 3937 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3938 (the default parallel PETSc format). For good matrix assembly performance 3939 the user should preallocate the matrix storage by setting the parameters 3940 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3941 performance can be increased by more than a factor of 50. 3942 3943 Collective 3944 3945 Input Parameters: 3946 + B - the matrix 3947 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3948 (same value is used for all local rows) 3949 . d_nnz - array containing the number of nonzeros in the various rows of the 3950 DIAGONAL portion of the local submatrix (possibly different for each row) 3951 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3952 The size of this array is equal to the number of local rows, i.e 'm'. 3953 For matrices that will be factored, you must leave room for (and set) 3954 the diagonal entry even if it is zero. 3955 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3956 submatrix (same value is used for all local rows). 3957 - o_nnz - array containing the number of nonzeros in the various rows of the 3958 OFF-DIAGONAL portion of the local submatrix (possibly different for 3959 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3960 structure. The size of this array is equal to the number 3961 of local rows, i.e 'm'. 3962 3963 If the *_nnz parameter is given then the *_nz parameter is ignored 3964 3965 The AIJ format (also called the Yale sparse matrix format or 3966 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3967 storage. The stored row and column indices begin with zero. 3968 See Users-Manual: ch_mat for details. 3969 3970 The parallel matrix is partitioned such that the first m0 rows belong to 3971 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3972 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3973 3974 The DIAGONAL portion of the local submatrix of a processor can be defined 3975 as the submatrix which is obtained by extraction the part corresponding to 3976 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3977 first row that belongs to the processor, r2 is the last row belonging to 3978 the this processor, and c1-c2 is range of indices of the local part of a 3979 vector suitable for applying the matrix to. This is an mxn matrix. In the 3980 common case of a square matrix, the row and column ranges are the same and 3981 the DIAGONAL part is also square. The remaining portion of the local 3982 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3983 3984 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3985 3986 You can call MatGetInfo() to get information on how effective the preallocation was; 3987 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3988 You can also run with the option -info and look for messages with the string 3989 malloc in them to see if additional memory allocation was needed. 3990 3991 Example usage: 3992 3993 Consider the following 8x8 matrix with 34 non-zero values, that is 3994 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3995 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3996 as follows: 3997 3998 .vb 3999 1 2 0 | 0 3 0 | 0 4 4000 Proc0 0 5 6 | 7 0 0 | 8 0 4001 9 0 10 | 11 0 0 | 12 0 4002 ------------------------------------- 4003 13 0 14 | 15 16 17 | 0 0 4004 Proc1 0 18 0 | 19 20 21 | 0 0 4005 0 0 0 | 22 23 0 | 24 0 4006 ------------------------------------- 4007 Proc2 25 26 27 | 0 0 28 | 29 0 4008 30 0 0 | 31 32 33 | 0 34 4009 .ve 4010 4011 This can be represented as a collection of submatrices as: 4012 4013 .vb 4014 A B C 4015 D E F 4016 G H I 4017 .ve 4018 4019 Where the submatrices A,B,C are owned by proc0, D,E,F are 4020 owned by proc1, G,H,I are owned by proc2. 4021 4022 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4023 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4024 The 'M','N' parameters are 8,8, and have the same values on all procs. 4025 4026 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4027 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4028 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4029 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4030 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4031 matrix, ans [DF] as another SeqAIJ matrix. 4032 4033 When d_nz, o_nz parameters are specified, d_nz storage elements are 4034 allocated for every row of the local diagonal submatrix, and o_nz 4035 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4036 One way to choose d_nz and o_nz is to use the max nonzerors per local 4037 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4038 In this case, the values of d_nz,o_nz are: 4039 .vb 4040 proc0 : dnz = 2, o_nz = 2 4041 proc1 : dnz = 3, o_nz = 2 4042 proc2 : dnz = 1, o_nz = 4 4043 .ve 4044 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4045 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4046 for proc3. i.e we are using 12+15+10=37 storage locations to store 4047 34 values. 4048 4049 When d_nnz, o_nnz parameters are specified, the storage is specified 4050 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4051 In the above case the values for d_nnz,o_nnz are: 4052 .vb 4053 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4054 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4055 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4056 .ve 4057 Here the space allocated is sum of all the above values i.e 34, and 4058 hence pre-allocation is perfect. 4059 4060 Level: intermediate 4061 4062 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4063 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4064 @*/ 4065 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4066 { 4067 PetscFunctionBegin; 4068 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4069 PetscValidType(B,1); 4070 PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz)); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 /*@ 4075 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4076 CSR format for the local rows. 4077 4078 Collective 4079 4080 Input Parameters: 4081 + comm - MPI communicator 4082 . m - number of local rows (Cannot be PETSC_DECIDE) 4083 . n - This value should be the same as the local size used in creating the 4084 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4085 calculated if N is given) For square matrices n is almost always m. 4086 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4087 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4088 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4089 . j - column indices 4090 - a - matrix values 4091 4092 Output Parameter: 4093 . mat - the matrix 4094 4095 Level: intermediate 4096 4097 Notes: 4098 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4099 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4100 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4101 4102 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4103 4104 The format which is used for the sparse matrix input, is equivalent to a 4105 row-major ordering.. i.e for the following matrix, the input data expected is 4106 as shown 4107 4108 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4109 4110 $ 1 0 0 4111 $ 2 0 3 P0 4112 $ ------- 4113 $ 4 5 6 P1 4114 $ 4115 $ Process0 [P0]: rows_owned=[0,1] 4116 $ i = {0,1,3} [size = nrow+1 = 2+1] 4117 $ j = {0,0,2} [size = 3] 4118 $ v = {1,2,3} [size = 3] 4119 $ 4120 $ Process1 [P1]: rows_owned=[2] 4121 $ i = {0,3} [size = nrow+1 = 1+1] 4122 $ j = {0,1,2} [size = 3] 4123 $ v = {4,5,6} [size = 3] 4124 4125 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4126 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4127 @*/ 4128 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4129 { 4130 PetscFunctionBegin; 4131 PetscCheck(!i || !i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4132 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4133 PetscCall(MatCreate(comm,mat)); 4134 PetscCall(MatSetSizes(*mat,m,n,M,N)); 4135 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4136 PetscCall(MatSetType(*mat,MATMPIAIJ)); 4137 PetscCall(MatMPIAIJSetPreallocationCSR(*mat,i,j,a)); 4138 PetscFunctionReturn(0); 4139 } 4140 4141 /*@ 4142 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4143 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4144 4145 Collective 4146 4147 Input Parameters: 4148 + mat - the matrix 4149 . m - number of local rows (Cannot be PETSC_DECIDE) 4150 . n - This value should be the same as the local size used in creating the 4151 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4152 calculated if N is given) For square matrices n is almost always m. 4153 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4154 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4155 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4156 . J - column indices 4157 - v - matrix values 4158 4159 Level: intermediate 4160 4161 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4162 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4163 @*/ 4164 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4165 { 4166 PetscInt cstart,nnz,i,j; 4167 PetscInt *ld; 4168 PetscBool nooffprocentries; 4169 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4170 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data; 4171 PetscScalar *ad,*ao; 4172 const PetscInt *Adi = Ad->i; 4173 PetscInt ldi,Iii,md; 4174 4175 PetscFunctionBegin; 4176 PetscCheck(Ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4177 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4178 PetscCheck(m == mat->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4179 PetscCheck(n == mat->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4180 4181 PetscCall(MatSeqAIJGetArrayWrite(Aij->A,&ad)); 4182 PetscCall(MatSeqAIJGetArrayWrite(Aij->B,&ao)); 4183 cstart = mat->cmap->rstart; 4184 if (!Aij->ld) { 4185 /* count number of entries below block diagonal */ 4186 PetscCall(PetscCalloc1(m,&ld)); 4187 Aij->ld = ld; 4188 for (i=0; i<m; i++) { 4189 nnz = Ii[i+1]- Ii[i]; 4190 j = 0; 4191 while (J[j] < cstart && j < nnz) {j++;} 4192 J += nnz; 4193 ld[i] = j; 4194 } 4195 } else { 4196 ld = Aij->ld; 4197 } 4198 4199 for (i=0; i<m; i++) { 4200 nnz = Ii[i+1]- Ii[i]; 4201 Iii = Ii[i]; 4202 ldi = ld[i]; 4203 md = Adi[i+1]-Adi[i]; 4204 PetscCall(PetscArraycpy(ao,v + Iii,ldi)); 4205 PetscCall(PetscArraycpy(ad,v + Iii + ldi,md)); 4206 PetscCall(PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md)); 4207 ad += md; 4208 ao += nnz - md; 4209 } 4210 nooffprocentries = mat->nooffprocentries; 4211 mat->nooffprocentries = PETSC_TRUE; 4212 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A,&ad)); 4213 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B,&ao)); 4214 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4215 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4216 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4217 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 4218 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 4219 mat->nooffprocentries = nooffprocentries; 4220 PetscFunctionReturn(0); 4221 } 4222 4223 /*@C 4224 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4225 (the default parallel PETSc format). For good matrix assembly performance 4226 the user should preallocate the matrix storage by setting the parameters 4227 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4228 performance can be increased by more than a factor of 50. 4229 4230 Collective 4231 4232 Input Parameters: 4233 + comm - MPI communicator 4234 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4235 This value should be the same as the local size used in creating the 4236 y vector for the matrix-vector product y = Ax. 4237 . n - This value should be the same as the local size used in creating the 4238 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4239 calculated if N is given) For square matrices n is almost always m. 4240 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4241 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4242 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4243 (same value is used for all local rows) 4244 . d_nnz - array containing the number of nonzeros in the various rows of the 4245 DIAGONAL portion of the local submatrix (possibly different for each row) 4246 or NULL, if d_nz is used to specify the nonzero structure. 4247 The size of this array is equal to the number of local rows, i.e 'm'. 4248 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4249 submatrix (same value is used for all local rows). 4250 - o_nnz - array containing the number of nonzeros in the various rows of the 4251 OFF-DIAGONAL portion of the local submatrix (possibly different for 4252 each row) or NULL, if o_nz is used to specify the nonzero 4253 structure. The size of this array is equal to the number 4254 of local rows, i.e 'm'. 4255 4256 Output Parameter: 4257 . A - the matrix 4258 4259 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4260 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4261 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4262 4263 Notes: 4264 If the *_nnz parameter is given then the *_nz parameter is ignored 4265 4266 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4267 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4268 storage requirements for this matrix. 4269 4270 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4271 processor than it must be used on all processors that share the object for 4272 that argument. 4273 4274 The user MUST specify either the local or global matrix dimensions 4275 (possibly both). 4276 4277 The parallel matrix is partitioned across processors such that the 4278 first m0 rows belong to process 0, the next m1 rows belong to 4279 process 1, the next m2 rows belong to process 2 etc.. where 4280 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4281 values corresponding to [m x N] submatrix. 4282 4283 The columns are logically partitioned with the n0 columns belonging 4284 to 0th partition, the next n1 columns belonging to the next 4285 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4286 4287 The DIAGONAL portion of the local submatrix on any given processor 4288 is the submatrix corresponding to the rows and columns m,n 4289 corresponding to the given processor. i.e diagonal matrix on 4290 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4291 etc. The remaining portion of the local submatrix [m x (N-n)] 4292 constitute the OFF-DIAGONAL portion. The example below better 4293 illustrates this concept. 4294 4295 For a square global matrix we define each processor's diagonal portion 4296 to be its local rows and the corresponding columns (a square submatrix); 4297 each processor's off-diagonal portion encompasses the remainder of the 4298 local matrix (a rectangular submatrix). 4299 4300 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4301 4302 When calling this routine with a single process communicator, a matrix of 4303 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4304 type of communicator, use the construction mechanism 4305 .vb 4306 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4307 .ve 4308 4309 $ MatCreate(...,&A); 4310 $ MatSetType(A,MATMPIAIJ); 4311 $ MatSetSizes(A, m,n,M,N); 4312 $ MatMPIAIJSetPreallocation(A,...); 4313 4314 By default, this format uses inodes (identical nodes) when possible. 4315 We search for consecutive rows with the same nonzero structure, thereby 4316 reusing matrix information to achieve increased efficiency. 4317 4318 Options Database Keys: 4319 + -mat_no_inode - Do not use inodes 4320 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4321 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4322 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4323 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4324 4325 Example usage: 4326 4327 Consider the following 8x8 matrix with 34 non-zero values, that is 4328 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4329 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4330 as follows 4331 4332 .vb 4333 1 2 0 | 0 3 0 | 0 4 4334 Proc0 0 5 6 | 7 0 0 | 8 0 4335 9 0 10 | 11 0 0 | 12 0 4336 ------------------------------------- 4337 13 0 14 | 15 16 17 | 0 0 4338 Proc1 0 18 0 | 19 20 21 | 0 0 4339 0 0 0 | 22 23 0 | 24 0 4340 ------------------------------------- 4341 Proc2 25 26 27 | 0 0 28 | 29 0 4342 30 0 0 | 31 32 33 | 0 34 4343 .ve 4344 4345 This can be represented as a collection of submatrices as 4346 4347 .vb 4348 A B C 4349 D E F 4350 G H I 4351 .ve 4352 4353 Where the submatrices A,B,C are owned by proc0, D,E,F are 4354 owned by proc1, G,H,I are owned by proc2. 4355 4356 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4357 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4358 The 'M','N' parameters are 8,8, and have the same values on all procs. 4359 4360 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4361 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4362 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4363 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4364 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4365 matrix, ans [DF] as another SeqAIJ matrix. 4366 4367 When d_nz, o_nz parameters are specified, d_nz storage elements are 4368 allocated for every row of the local diagonal submatrix, and o_nz 4369 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4370 One way to choose d_nz and o_nz is to use the max nonzerors per local 4371 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4372 In this case, the values of d_nz,o_nz are 4373 .vb 4374 proc0 : dnz = 2, o_nz = 2 4375 proc1 : dnz = 3, o_nz = 2 4376 proc2 : dnz = 1, o_nz = 4 4377 .ve 4378 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4379 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4380 for proc3. i.e we are using 12+15+10=37 storage locations to store 4381 34 values. 4382 4383 When d_nnz, o_nnz parameters are specified, the storage is specified 4384 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4385 In the above case the values for d_nnz,o_nnz are 4386 .vb 4387 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4388 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4389 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4390 .ve 4391 Here the space allocated is sum of all the above values i.e 34, and 4392 hence pre-allocation is perfect. 4393 4394 Level: intermediate 4395 4396 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4397 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4398 @*/ 4399 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4400 { 4401 PetscMPIInt size; 4402 4403 PetscFunctionBegin; 4404 PetscCall(MatCreate(comm,A)); 4405 PetscCall(MatSetSizes(*A,m,n,M,N)); 4406 PetscCallMPI(MPI_Comm_size(comm,&size)); 4407 if (size > 1) { 4408 PetscCall(MatSetType(*A,MATMPIAIJ)); 4409 PetscCall(MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz)); 4410 } else { 4411 PetscCall(MatSetType(*A,MATSEQAIJ)); 4412 PetscCall(MatSeqAIJSetPreallocation(*A,d_nz,d_nnz)); 4413 } 4414 PetscFunctionReturn(0); 4415 } 4416 4417 /*@C 4418 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4419 4420 Not collective 4421 4422 Input Parameter: 4423 . A - The MPIAIJ matrix 4424 4425 Output Parameters: 4426 + Ad - The local diagonal block as a SeqAIJ matrix 4427 . Ao - The local off-diagonal block as a SeqAIJ matrix 4428 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4429 4430 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4431 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4432 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4433 local column numbers to global column numbers in the original matrix. 4434 4435 Level: intermediate 4436 4437 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4438 @*/ 4439 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4440 { 4441 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4442 PetscBool flg; 4443 4444 PetscFunctionBegin; 4445 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg)); 4446 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4447 if (Ad) *Ad = a->A; 4448 if (Ao) *Ao = a->B; 4449 if (colmap) *colmap = a->garray; 4450 PetscFunctionReturn(0); 4451 } 4452 4453 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4454 { 4455 PetscInt m,N,i,rstart,nnz,Ii; 4456 PetscInt *indx; 4457 PetscScalar *values; 4458 MatType rootType; 4459 4460 PetscFunctionBegin; 4461 PetscCall(MatGetSize(inmat,&m,&N)); 4462 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4463 PetscInt *dnz,*onz,sum,bs,cbs; 4464 4465 if (n == PETSC_DECIDE) { 4466 PetscCall(PetscSplitOwnership(comm,&n,&N)); 4467 } 4468 /* Check sum(n) = N */ 4469 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 4470 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 4471 4472 PetscCallMPI(MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm)); 4473 rstart -= m; 4474 4475 MatPreallocateBegin(comm,m,n,dnz,onz); 4476 for (i=0; i<m; i++) { 4477 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4478 PetscCall(MatPreallocateSet(i+rstart,nnz,indx,dnz,onz)); 4479 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL)); 4480 } 4481 4482 PetscCall(MatCreate(comm,outmat)); 4483 PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4484 PetscCall(MatGetBlockSizes(inmat,&bs,&cbs)); 4485 PetscCall(MatSetBlockSizes(*outmat,bs,cbs)); 4486 PetscCall(MatGetRootType_Private(inmat,&rootType)); 4487 PetscCall(MatSetType(*outmat,rootType)); 4488 PetscCall(MatSeqAIJSetPreallocation(*outmat,0,dnz)); 4489 PetscCall(MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz)); 4490 MatPreallocateEnd(dnz,onz); 4491 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 4492 } 4493 4494 /* numeric phase */ 4495 PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL)); 4496 for (i=0; i<m; i++) { 4497 PetscCall(MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4498 Ii = i + rstart; 4499 PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES)); 4500 PetscCall(MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values)); 4501 } 4502 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 4503 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 4504 PetscFunctionReturn(0); 4505 } 4506 4507 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4508 { 4509 PetscMPIInt rank; 4510 PetscInt m,N,i,rstart,nnz; 4511 size_t len; 4512 const PetscInt *indx; 4513 PetscViewer out; 4514 char *name; 4515 Mat B; 4516 const PetscScalar *values; 4517 4518 PetscFunctionBegin; 4519 PetscCall(MatGetLocalSize(A,&m,NULL)); 4520 PetscCall(MatGetSize(A,NULL,&N)); 4521 /* Should this be the type of the diagonal block of A? */ 4522 PetscCall(MatCreate(PETSC_COMM_SELF,&B)); 4523 PetscCall(MatSetSizes(B,m,N,m,N)); 4524 PetscCall(MatSetBlockSizesFromMats(B,A,A)); 4525 PetscCall(MatSetType(B,MATSEQAIJ)); 4526 PetscCall(MatSeqAIJSetPreallocation(B,0,NULL)); 4527 PetscCall(MatGetOwnershipRange(A,&rstart,NULL)); 4528 for (i=0; i<m; i++) { 4529 PetscCall(MatGetRow(A,i+rstart,&nnz,&indx,&values)); 4530 PetscCall(MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES)); 4531 PetscCall(MatRestoreRow(A,i+rstart,&nnz,&indx,&values)); 4532 } 4533 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 4534 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 4535 4536 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank)); 4537 PetscCall(PetscStrlen(outfile,&len)); 4538 PetscCall(PetscMalloc1(len+6,&name)); 4539 PetscCall(PetscSNPrintf(name,len+6,"%s.%d",outfile,rank)); 4540 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out)); 4541 PetscCall(PetscFree(name)); 4542 PetscCall(MatView(B,out)); 4543 PetscCall(PetscViewerDestroy(&out)); 4544 PetscCall(MatDestroy(&B)); 4545 PetscFunctionReturn(0); 4546 } 4547 4548 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4549 { 4550 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4551 4552 PetscFunctionBegin; 4553 if (!merge) PetscFunctionReturn(0); 4554 PetscCall(PetscFree(merge->id_r)); 4555 PetscCall(PetscFree(merge->len_s)); 4556 PetscCall(PetscFree(merge->len_r)); 4557 PetscCall(PetscFree(merge->bi)); 4558 PetscCall(PetscFree(merge->bj)); 4559 PetscCall(PetscFree(merge->buf_ri[0])); 4560 PetscCall(PetscFree(merge->buf_ri)); 4561 PetscCall(PetscFree(merge->buf_rj[0])); 4562 PetscCall(PetscFree(merge->buf_rj)); 4563 PetscCall(PetscFree(merge->coi)); 4564 PetscCall(PetscFree(merge->coj)); 4565 PetscCall(PetscFree(merge->owners_co)); 4566 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4567 PetscCall(PetscFree(merge)); 4568 PetscFunctionReturn(0); 4569 } 4570 4571 #include <../src/mat/utils/freespace.h> 4572 #include <petscbt.h> 4573 4574 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4575 { 4576 MPI_Comm comm; 4577 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4578 PetscMPIInt size,rank,taga,*len_s; 4579 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4580 PetscInt proc,m; 4581 PetscInt **buf_ri,**buf_rj; 4582 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4583 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4584 MPI_Request *s_waits,*r_waits; 4585 MPI_Status *status; 4586 const MatScalar *aa,*a_a; 4587 MatScalar **abuf_r,*ba_i; 4588 Mat_Merge_SeqsToMPI *merge; 4589 PetscContainer container; 4590 4591 PetscFunctionBegin; 4592 PetscCall(PetscObjectGetComm((PetscObject)mpimat,&comm)); 4593 PetscCall(PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0)); 4594 4595 PetscCallMPI(MPI_Comm_size(comm,&size)); 4596 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4597 4598 PetscCall(PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container)); 4599 PetscCheck(container,PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4600 PetscCall(PetscContainerGetPointer(container,(void**)&merge)); 4601 PetscCall(MatSeqAIJGetArrayRead(seqmat,&a_a)); 4602 aa = a_a; 4603 4604 bi = merge->bi; 4605 bj = merge->bj; 4606 buf_ri = merge->buf_ri; 4607 buf_rj = merge->buf_rj; 4608 4609 PetscCall(PetscMalloc1(size,&status)); 4610 owners = merge->rowmap->range; 4611 len_s = merge->len_s; 4612 4613 /* send and recv matrix values */ 4614 /*-----------------------------*/ 4615 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat,&taga)); 4616 PetscCall(PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits)); 4617 4618 PetscCall(PetscMalloc1(merge->nsend+1,&s_waits)); 4619 for (proc=0,k=0; proc<size; proc++) { 4620 if (!len_s[proc]) continue; 4621 i = owners[proc]; 4622 PetscCallMPI(MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)); 4623 k++; 4624 } 4625 4626 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,r_waits,status)); 4627 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,s_waits,status)); 4628 PetscCall(PetscFree(status)); 4629 4630 PetscCall(PetscFree(s_waits)); 4631 PetscCall(PetscFree(r_waits)); 4632 4633 /* insert mat values of mpimat */ 4634 /*----------------------------*/ 4635 PetscCall(PetscMalloc1(N,&ba_i)); 4636 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4637 4638 for (k=0; k<merge->nrecv; k++) { 4639 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4640 nrows = *(buf_ri_k[k]); 4641 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4642 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4643 } 4644 4645 /* set values of ba */ 4646 m = merge->rowmap->n; 4647 for (i=0; i<m; i++) { 4648 arow = owners[rank] + i; 4649 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4650 bnzi = bi[i+1] - bi[i]; 4651 PetscCall(PetscArrayzero(ba_i,bnzi)); 4652 4653 /* add local non-zero vals of this proc's seqmat into ba */ 4654 anzi = ai[arow+1] - ai[arow]; 4655 aj = a->j + ai[arow]; 4656 aa = a_a + ai[arow]; 4657 nextaj = 0; 4658 for (j=0; nextaj<anzi; j++) { 4659 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4660 ba_i[j] += aa[nextaj++]; 4661 } 4662 } 4663 4664 /* add received vals into ba */ 4665 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4666 /* i-th row */ 4667 if (i == *nextrow[k]) { 4668 anzi = *(nextai[k]+1) - *nextai[k]; 4669 aj = buf_rj[k] + *(nextai[k]); 4670 aa = abuf_r[k] + *(nextai[k]); 4671 nextaj = 0; 4672 for (j=0; nextaj<anzi; j++) { 4673 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4674 ba_i[j] += aa[nextaj++]; 4675 } 4676 } 4677 nextrow[k]++; nextai[k]++; 4678 } 4679 } 4680 PetscCall(MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES)); 4681 } 4682 PetscCall(MatSeqAIJRestoreArrayRead(seqmat,&a_a)); 4683 PetscCall(MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY)); 4684 PetscCall(MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY)); 4685 4686 PetscCall(PetscFree(abuf_r[0])); 4687 PetscCall(PetscFree(abuf_r)); 4688 PetscCall(PetscFree(ba_i)); 4689 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4690 PetscCall(PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0)); 4691 PetscFunctionReturn(0); 4692 } 4693 4694 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4695 { 4696 Mat B_mpi; 4697 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4698 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4699 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4700 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4701 PetscInt len,proc,*dnz,*onz,bs,cbs; 4702 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4703 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4704 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4705 MPI_Status *status; 4706 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4707 PetscBT lnkbt; 4708 Mat_Merge_SeqsToMPI *merge; 4709 PetscContainer container; 4710 4711 PetscFunctionBegin; 4712 PetscCall(PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0)); 4713 4714 /* make sure it is a PETSc comm */ 4715 PetscCall(PetscCommDuplicate(comm,&comm,NULL)); 4716 PetscCallMPI(MPI_Comm_size(comm,&size)); 4717 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 4718 4719 PetscCall(PetscNew(&merge)); 4720 PetscCall(PetscMalloc1(size,&status)); 4721 4722 /* determine row ownership */ 4723 /*---------------------------------------------------------*/ 4724 PetscCall(PetscLayoutCreate(comm,&merge->rowmap)); 4725 PetscCall(PetscLayoutSetLocalSize(merge->rowmap,m)); 4726 PetscCall(PetscLayoutSetSize(merge->rowmap,M)); 4727 PetscCall(PetscLayoutSetBlockSize(merge->rowmap,1)); 4728 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4729 PetscCall(PetscMalloc1(size,&len_si)); 4730 PetscCall(PetscMalloc1(size,&merge->len_s)); 4731 4732 m = merge->rowmap->n; 4733 owners = merge->rowmap->range; 4734 4735 /* determine the number of messages to send, their lengths */ 4736 /*---------------------------------------------------------*/ 4737 len_s = merge->len_s; 4738 4739 len = 0; /* length of buf_si[] */ 4740 merge->nsend = 0; 4741 for (proc=0; proc<size; proc++) { 4742 len_si[proc] = 0; 4743 if (proc == rank) { 4744 len_s[proc] = 0; 4745 } else { 4746 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4747 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4748 } 4749 if (len_s[proc]) { 4750 merge->nsend++; 4751 nrows = 0; 4752 for (i=owners[proc]; i<owners[proc+1]; i++) { 4753 if (ai[i+1] > ai[i]) nrows++; 4754 } 4755 len_si[proc] = 2*(nrows+1); 4756 len += len_si[proc]; 4757 } 4758 } 4759 4760 /* determine the number and length of messages to receive for ij-structure */ 4761 /*-------------------------------------------------------------------------*/ 4762 PetscCall(PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv)); 4763 PetscCall(PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri)); 4764 4765 /* post the Irecv of j-structure */ 4766 /*-------------------------------*/ 4767 PetscCall(PetscCommGetNewTag(comm,&tagj)); 4768 PetscCall(PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits)); 4769 4770 /* post the Isend of j-structure */ 4771 /*--------------------------------*/ 4772 PetscCall(PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits)); 4773 4774 for (proc=0, k=0; proc<size; proc++) { 4775 if (!len_s[proc]) continue; 4776 i = owners[proc]; 4777 PetscCallMPI(MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k)); 4778 k++; 4779 } 4780 4781 /* receives and sends of j-structure are complete */ 4782 /*------------------------------------------------*/ 4783 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,rj_waits,status)); 4784 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,sj_waits,status)); 4785 4786 /* send and recv i-structure */ 4787 /*---------------------------*/ 4788 PetscCall(PetscCommGetNewTag(comm,&tagi)); 4789 PetscCall(PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits)); 4790 4791 PetscCall(PetscMalloc1(len+1,&buf_s)); 4792 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4793 for (proc=0,k=0; proc<size; proc++) { 4794 if (!len_s[proc]) continue; 4795 /* form outgoing message for i-structure: 4796 buf_si[0]: nrows to be sent 4797 [1:nrows]: row index (global) 4798 [nrows+1:2*nrows+1]: i-structure index 4799 */ 4800 /*-------------------------------------------*/ 4801 nrows = len_si[proc]/2 - 1; 4802 buf_si_i = buf_si + nrows+1; 4803 buf_si[0] = nrows; 4804 buf_si_i[0] = 0; 4805 nrows = 0; 4806 for (i=owners[proc]; i<owners[proc+1]; i++) { 4807 anzi = ai[i+1] - ai[i]; 4808 if (anzi) { 4809 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4810 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4811 nrows++; 4812 } 4813 } 4814 PetscCallMPI(MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k)); 4815 k++; 4816 buf_si += len_si[proc]; 4817 } 4818 4819 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv,ri_waits,status)); 4820 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend,si_waits,status)); 4821 4822 PetscCall(PetscInfo(seqmat,"nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv)); 4823 for (i=0; i<merge->nrecv; i++) { 4824 PetscCall(PetscInfo(seqmat,"recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i])); 4825 } 4826 4827 PetscCall(PetscFree(len_si)); 4828 PetscCall(PetscFree(len_ri)); 4829 PetscCall(PetscFree(rj_waits)); 4830 PetscCall(PetscFree2(si_waits,sj_waits)); 4831 PetscCall(PetscFree(ri_waits)); 4832 PetscCall(PetscFree(buf_s)); 4833 PetscCall(PetscFree(status)); 4834 4835 /* compute a local seq matrix in each processor */ 4836 /*----------------------------------------------*/ 4837 /* allocate bi array and free space for accumulating nonzero column info */ 4838 PetscCall(PetscMalloc1(m+1,&bi)); 4839 bi[0] = 0; 4840 4841 /* create and initialize a linked list */ 4842 nlnk = N+1; 4843 PetscCall(PetscLLCreate(N,N,nlnk,lnk,lnkbt)); 4844 4845 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4846 len = ai[owners[rank+1]] - ai[owners[rank]]; 4847 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space)); 4848 4849 current_space = free_space; 4850 4851 /* determine symbolic info for each local row */ 4852 PetscCall(PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai)); 4853 4854 for (k=0; k<merge->nrecv; k++) { 4855 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4856 nrows = *buf_ri_k[k]; 4857 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4858 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4859 } 4860 4861 MatPreallocateBegin(comm,m,n,dnz,onz); 4862 len = 0; 4863 for (i=0; i<m; i++) { 4864 bnzi = 0; 4865 /* add local non-zero cols of this proc's seqmat into lnk */ 4866 arow = owners[rank] + i; 4867 anzi = ai[arow+1] - ai[arow]; 4868 aj = a->j + ai[arow]; 4869 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4870 bnzi += nlnk; 4871 /* add received col data into lnk */ 4872 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4873 if (i == *nextrow[k]) { /* i-th row */ 4874 anzi = *(nextai[k]+1) - *nextai[k]; 4875 aj = buf_rj[k] + *nextai[k]; 4876 PetscCall(PetscLLAddSorted(anzi,aj,N,&nlnk,lnk,lnkbt)); 4877 bnzi += nlnk; 4878 nextrow[k]++; nextai[k]++; 4879 } 4880 } 4881 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4882 4883 /* if free space is not available, make more free space */ 4884 if (current_space->local_remaining<bnzi) { 4885 PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space)); 4886 nspacedouble++; 4887 } 4888 /* copy data into free space, then initialize lnk */ 4889 PetscCall(PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt)); 4890 PetscCall(MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz)); 4891 4892 current_space->array += bnzi; 4893 current_space->local_used += bnzi; 4894 current_space->local_remaining -= bnzi; 4895 4896 bi[i+1] = bi[i] + bnzi; 4897 } 4898 4899 PetscCall(PetscFree3(buf_ri_k,nextrow,nextai)); 4900 4901 PetscCall(PetscMalloc1(bi[m]+1,&bj)); 4902 PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4903 PetscCall(PetscLLDestroy(lnk,lnkbt)); 4904 4905 /* create symbolic parallel matrix B_mpi */ 4906 /*---------------------------------------*/ 4907 PetscCall(MatGetBlockSizes(seqmat,&bs,&cbs)); 4908 PetscCall(MatCreate(comm,&B_mpi)); 4909 if (n==PETSC_DECIDE) { 4910 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N)); 4911 } else { 4912 PetscCall(MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE)); 4913 } 4914 PetscCall(MatSetBlockSizes(B_mpi,bs,cbs)); 4915 PetscCall(MatSetType(B_mpi,MATMPIAIJ)); 4916 PetscCall(MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz)); 4917 MatPreallocateEnd(dnz,onz); 4918 PetscCall(MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE)); 4919 4920 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4921 B_mpi->assembled = PETSC_FALSE; 4922 merge->bi = bi; 4923 merge->bj = bj; 4924 merge->buf_ri = buf_ri; 4925 merge->buf_rj = buf_rj; 4926 merge->coi = NULL; 4927 merge->coj = NULL; 4928 merge->owners_co = NULL; 4929 4930 PetscCall(PetscCommDestroy(&comm)); 4931 4932 /* attach the supporting struct to B_mpi for reuse */ 4933 PetscCall(PetscContainerCreate(PETSC_COMM_SELF,&container)); 4934 PetscCall(PetscContainerSetPointer(container,merge)); 4935 PetscCall(PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI)); 4936 PetscCall(PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container)); 4937 PetscCall(PetscContainerDestroy(&container)); 4938 *mpimat = B_mpi; 4939 4940 PetscCall(PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0)); 4941 PetscFunctionReturn(0); 4942 } 4943 4944 /*@C 4945 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4946 matrices from each processor 4947 4948 Collective 4949 4950 Input Parameters: 4951 + comm - the communicators the parallel matrix will live on 4952 . seqmat - the input sequential matrices 4953 . m - number of local rows (or PETSC_DECIDE) 4954 . n - number of local columns (or PETSC_DECIDE) 4955 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4956 4957 Output Parameter: 4958 . mpimat - the parallel matrix generated 4959 4960 Level: advanced 4961 4962 Notes: 4963 The dimensions of the sequential matrix in each processor MUST be the same. 4964 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4965 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4966 @*/ 4967 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4968 { 4969 PetscMPIInt size; 4970 4971 PetscFunctionBegin; 4972 PetscCallMPI(MPI_Comm_size(comm,&size)); 4973 if (size == 1) { 4974 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4975 if (scall == MAT_INITIAL_MATRIX) { 4976 PetscCall(MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat)); 4977 } else { 4978 PetscCall(MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN)); 4979 } 4980 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4981 PetscFunctionReturn(0); 4982 } 4983 PetscCall(PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0)); 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat)); 4986 } 4987 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat)); 4988 PetscCall(PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0)); 4989 PetscFunctionReturn(0); 4990 } 4991 4992 /*@ 4993 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4994 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4995 with MatGetSize() 4996 4997 Not Collective 4998 4999 Input Parameters: 5000 + A - the matrix 5001 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5002 5003 Output Parameter: 5004 . A_loc - the local sequential matrix generated 5005 5006 Level: developer 5007 5008 Notes: 5009 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5010 5011 Destroy the matrix with MatDestroy() 5012 5013 .seealso: MatMPIAIJGetLocalMat() 5014 5015 @*/ 5016 PetscErrorCode MatAIJGetLocalMat(Mat A,Mat *A_loc) 5017 { 5018 PetscBool mpi; 5019 5020 PetscFunctionBegin; 5021 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&mpi)); 5022 if (mpi) { 5023 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,A_loc)); 5024 } else { 5025 *A_loc = A; 5026 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5027 } 5028 PetscFunctionReturn(0); 5029 } 5030 5031 /*@ 5032 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5033 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5034 with MatGetSize() 5035 5036 Not Collective 5037 5038 Input Parameters: 5039 + A - the matrix 5040 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5041 5042 Output Parameter: 5043 . A_loc - the local sequential matrix generated 5044 5045 Level: developer 5046 5047 Notes: 5048 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5049 5050 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5051 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5052 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5053 modify the values of the returned A_loc. 5054 5055 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5056 @*/ 5057 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5058 { 5059 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5060 Mat_SeqAIJ *mat,*a,*b; 5061 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5062 const PetscScalar *aa,*ba,*aav,*bav; 5063 PetscScalar *ca,*cam; 5064 PetscMPIInt size; 5065 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5066 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5067 PetscBool match; 5068 5069 PetscFunctionBegin; 5070 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match)); 5071 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5072 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5073 if (size == 1) { 5074 if (scall == MAT_INITIAL_MATRIX) { 5075 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5076 *A_loc = mpimat->A; 5077 } else if (scall == MAT_REUSE_MATRIX) { 5078 PetscCall(MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN)); 5079 } 5080 PetscFunctionReturn(0); 5081 } 5082 5083 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5084 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5085 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5086 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5087 PetscCall(MatSeqAIJGetArrayRead(mpimat->A,&aav)); 5088 PetscCall(MatSeqAIJGetArrayRead(mpimat->B,&bav)); 5089 aa = aav; 5090 ba = bav; 5091 if (scall == MAT_INITIAL_MATRIX) { 5092 PetscCall(PetscMalloc1(1+am,&ci)); 5093 ci[0] = 0; 5094 for (i=0; i<am; i++) { 5095 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5096 } 5097 PetscCall(PetscMalloc1(1+ci[am],&cj)); 5098 PetscCall(PetscMalloc1(1+ci[am],&ca)); 5099 k = 0; 5100 for (i=0; i<am; i++) { 5101 ncols_o = bi[i+1] - bi[i]; 5102 ncols_d = ai[i+1] - ai[i]; 5103 /* off-diagonal portion of A */ 5104 for (jo=0; jo<ncols_o; jo++) { 5105 col = cmap[*bj]; 5106 if (col >= cstart) break; 5107 cj[k] = col; bj++; 5108 ca[k++] = *ba++; 5109 } 5110 /* diagonal portion of A */ 5111 for (j=0; j<ncols_d; j++) { 5112 cj[k] = cstart + *aj++; 5113 ca[k++] = *aa++; 5114 } 5115 /* off-diagonal portion of A */ 5116 for (j=jo; j<ncols_o; j++) { 5117 cj[k] = cmap[*bj++]; 5118 ca[k++] = *ba++; 5119 } 5120 } 5121 /* put together the new matrix */ 5122 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc)); 5123 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5124 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5125 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5126 mat->free_a = PETSC_TRUE; 5127 mat->free_ij = PETSC_TRUE; 5128 mat->nonew = 0; 5129 } else if (scall == MAT_REUSE_MATRIX) { 5130 mat =(Mat_SeqAIJ*)(*A_loc)->data; 5131 ci = mat->i; 5132 cj = mat->j; 5133 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&cam)); 5134 for (i=0; i<am; i++) { 5135 /* off-diagonal portion of A */ 5136 ncols_o = bi[i+1] - bi[i]; 5137 for (jo=0; jo<ncols_o; jo++) { 5138 col = cmap[*bj]; 5139 if (col >= cstart) break; 5140 *cam++ = *ba++; bj++; 5141 } 5142 /* diagonal portion of A */ 5143 ncols_d = ai[i+1] - ai[i]; 5144 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5145 /* off-diagonal portion of A */ 5146 for (j=jo; j<ncols_o; j++) { 5147 *cam++ = *ba++; bj++; 5148 } 5149 } 5150 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&cam)); 5151 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5152 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A,&aav)); 5153 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B,&bav)); 5154 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5155 PetscFunctionReturn(0); 5156 } 5157 5158 /*@ 5159 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5160 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5161 5162 Not Collective 5163 5164 Input Parameters: 5165 + A - the matrix 5166 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5167 5168 Output Parameters: 5169 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5170 - A_loc - the local sequential matrix generated 5171 5172 Level: developer 5173 5174 Notes: 5175 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5176 5177 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5178 5179 @*/ 5180 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc) 5181 { 5182 Mat Ao,Ad; 5183 const PetscInt *cmap; 5184 PetscMPIInt size; 5185 PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*); 5186 5187 PetscFunctionBegin; 5188 PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap)); 5189 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 5190 if (size == 1) { 5191 if (scall == MAT_INITIAL_MATRIX) { 5192 PetscCall(PetscObjectReference((PetscObject)Ad)); 5193 *A_loc = Ad; 5194 } else if (scall == MAT_REUSE_MATRIX) { 5195 PetscCall(MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN)); 5196 } 5197 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob)); 5198 PetscFunctionReturn(0); 5199 } 5200 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f)); 5201 PetscCall(PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0)); 5202 if (f) { 5203 PetscCall((*f)(A,scall,glob,A_loc)); 5204 } else { 5205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)Ad->data; 5206 Mat_SeqAIJ *b = (Mat_SeqAIJ*)Ao->data; 5207 Mat_SeqAIJ *c; 5208 PetscInt *ai = a->i, *aj = a->j; 5209 PetscInt *bi = b->i, *bj = b->j; 5210 PetscInt *ci,*cj; 5211 const PetscScalar *aa,*ba; 5212 PetscScalar *ca; 5213 PetscInt i,j,am,dn,on; 5214 5215 PetscCall(MatGetLocalSize(Ad,&am,&dn)); 5216 PetscCall(MatGetLocalSize(Ao,NULL,&on)); 5217 PetscCall(MatSeqAIJGetArrayRead(Ad,&aa)); 5218 PetscCall(MatSeqAIJGetArrayRead(Ao,&ba)); 5219 if (scall == MAT_INITIAL_MATRIX) { 5220 PetscInt k; 5221 PetscCall(PetscMalloc1(1+am,&ci)); 5222 PetscCall(PetscMalloc1(ai[am]+bi[am],&cj)); 5223 PetscCall(PetscMalloc1(ai[am]+bi[am],&ca)); 5224 ci[0] = 0; 5225 for (i=0,k=0; i<am; i++) { 5226 const PetscInt ncols_o = bi[i+1] - bi[i]; 5227 const PetscInt ncols_d = ai[i+1] - ai[i]; 5228 ci[i+1] = ci[i] + ncols_o + ncols_d; 5229 /* diagonal portion of A */ 5230 for (j=0; j<ncols_d; j++,k++) { 5231 cj[k] = *aj++; 5232 ca[k] = *aa++; 5233 } 5234 /* off-diagonal portion of A */ 5235 for (j=0; j<ncols_o; j++,k++) { 5236 cj[k] = dn + *bj++; 5237 ca[k] = *ba++; 5238 } 5239 } 5240 /* put together the new matrix */ 5241 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc)); 5242 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5243 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5244 c = (Mat_SeqAIJ*)(*A_loc)->data; 5245 c->free_a = PETSC_TRUE; 5246 c->free_ij = PETSC_TRUE; 5247 c->nonew = 0; 5248 PetscCall(MatSetType(*A_loc,((PetscObject)Ad)->type_name)); 5249 } else if (scall == MAT_REUSE_MATRIX) { 5250 PetscCall(MatSeqAIJGetArrayWrite(*A_loc,&ca)); 5251 for (i=0; i<am; i++) { 5252 const PetscInt ncols_d = ai[i+1] - ai[i]; 5253 const PetscInt ncols_o = bi[i+1] - bi[i]; 5254 /* diagonal portion of A */ 5255 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 5256 /* off-diagonal portion of A */ 5257 for (j=0; j<ncols_o; j++) *ca++ = *ba++; 5258 } 5259 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc,&ca)); 5260 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5261 PetscCall(MatSeqAIJRestoreArrayRead(Ad,&aa)); 5262 PetscCall(MatSeqAIJRestoreArrayRead(Ao,&aa)); 5263 if (glob) { 5264 PetscInt cst, *gidx; 5265 5266 PetscCall(MatGetOwnershipRangeColumn(A,&cst,NULL)); 5267 PetscCall(PetscMalloc1(dn+on,&gidx)); 5268 for (i=0; i<dn; i++) gidx[i] = cst + i; 5269 for (i=0; i<on; i++) gidx[i+dn] = cmap[i]; 5270 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob)); 5271 } 5272 } 5273 PetscCall(PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0)); 5274 PetscFunctionReturn(0); 5275 } 5276 5277 /*@C 5278 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5279 5280 Not Collective 5281 5282 Input Parameters: 5283 + A - the matrix 5284 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5285 - row, col - index sets of rows and columns to extract (or NULL) 5286 5287 Output Parameter: 5288 . A_loc - the local sequential matrix generated 5289 5290 Level: developer 5291 5292 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5293 5294 @*/ 5295 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5296 { 5297 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5298 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5299 IS isrowa,iscola; 5300 Mat *aloc; 5301 PetscBool match; 5302 5303 PetscFunctionBegin; 5304 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match)); 5305 PetscCheck(match,PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5306 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0)); 5307 if (!row) { 5308 start = A->rmap->rstart; end = A->rmap->rend; 5309 PetscCall(ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa)); 5310 } else { 5311 isrowa = *row; 5312 } 5313 if (!col) { 5314 start = A->cmap->rstart; 5315 cmap = a->garray; 5316 nzA = a->A->cmap->n; 5317 nzB = a->B->cmap->n; 5318 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5319 ncols = 0; 5320 for (i=0; i<nzB; i++) { 5321 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5322 else break; 5323 } 5324 imark = i; 5325 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5326 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5327 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola)); 5328 } else { 5329 iscola = *col; 5330 } 5331 if (scall != MAT_INITIAL_MATRIX) { 5332 PetscCall(PetscMalloc1(1,&aloc)); 5333 aloc[0] = *A_loc; 5334 } 5335 PetscCall(MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc)); 5336 if (!col) { /* attach global id of condensed columns */ 5337 PetscCall(PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola)); 5338 } 5339 *A_loc = aloc[0]; 5340 PetscCall(PetscFree(aloc)); 5341 if (!row) { 5342 PetscCall(ISDestroy(&isrowa)); 5343 } 5344 if (!col) { 5345 PetscCall(ISDestroy(&iscola)); 5346 } 5347 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0)); 5348 PetscFunctionReturn(0); 5349 } 5350 5351 /* 5352 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5353 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5354 * on a global size. 5355 * */ 5356 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5357 { 5358 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5359 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5360 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5361 PetscMPIInt owner; 5362 PetscSFNode *iremote,*oiremote; 5363 const PetscInt *lrowindices; 5364 PetscSF sf,osf; 5365 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5366 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5367 MPI_Comm comm; 5368 ISLocalToGlobalMapping mapping; 5369 const PetscScalar *pd_a,*po_a; 5370 5371 PetscFunctionBegin; 5372 PetscCall(PetscObjectGetComm((PetscObject)P,&comm)); 5373 /* plocalsize is the number of roots 5374 * nrows is the number of leaves 5375 * */ 5376 PetscCall(MatGetLocalSize(P,&plocalsize,NULL)); 5377 PetscCall(ISGetLocalSize(rows,&nrows)); 5378 PetscCall(PetscCalloc1(nrows,&iremote)); 5379 PetscCall(ISGetIndices(rows,&lrowindices)); 5380 for (i=0;i<nrows;i++) { 5381 /* Find a remote index and an owner for a row 5382 * The row could be local or remote 5383 * */ 5384 owner = 0; 5385 lidx = 0; 5386 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx)); 5387 iremote[i].index = lidx; 5388 iremote[i].rank = owner; 5389 } 5390 /* Create SF to communicate how many nonzero columns for each row */ 5391 PetscCall(PetscSFCreate(comm,&sf)); 5392 /* SF will figure out the number of nonzero colunms for each row, and their 5393 * offsets 5394 * */ 5395 PetscCall(PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5396 PetscCall(PetscSFSetFromOptions(sf)); 5397 PetscCall(PetscSFSetUp(sf)); 5398 5399 PetscCall(PetscCalloc1(2*(plocalsize+1),&roffsets)); 5400 PetscCall(PetscCalloc1(2*plocalsize,&nrcols)); 5401 PetscCall(PetscCalloc1(nrows,&pnnz)); 5402 roffsets[0] = 0; 5403 roffsets[1] = 0; 5404 for (i=0;i<plocalsize;i++) { 5405 /* diag */ 5406 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5407 /* off diag */ 5408 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5409 /* compute offsets so that we relative location for each row */ 5410 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5411 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5412 } 5413 PetscCall(PetscCalloc1(2*nrows,&nlcols)); 5414 PetscCall(PetscCalloc1(2*nrows,&loffsets)); 5415 /* 'r' means root, and 'l' means leaf */ 5416 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5417 PetscCall(PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5418 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE)); 5419 PetscCall(PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE)); 5420 PetscCall(PetscSFDestroy(&sf)); 5421 PetscCall(PetscFree(roffsets)); 5422 PetscCall(PetscFree(nrcols)); 5423 dntotalcols = 0; 5424 ontotalcols = 0; 5425 ncol = 0; 5426 for (i=0;i<nrows;i++) { 5427 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5428 ncol = PetscMax(pnnz[i],ncol); 5429 /* diag */ 5430 dntotalcols += nlcols[i*2+0]; 5431 /* off diag */ 5432 ontotalcols += nlcols[i*2+1]; 5433 } 5434 /* We do not need to figure the right number of columns 5435 * since all the calculations will be done by going through the raw data 5436 * */ 5437 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth)); 5438 PetscCall(MatSetUp(*P_oth)); 5439 PetscCall(PetscFree(pnnz)); 5440 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5441 /* diag */ 5442 PetscCall(PetscCalloc1(dntotalcols,&iremote)); 5443 /* off diag */ 5444 PetscCall(PetscCalloc1(ontotalcols,&oiremote)); 5445 /* diag */ 5446 PetscCall(PetscCalloc1(dntotalcols,&ilocal)); 5447 /* off diag */ 5448 PetscCall(PetscCalloc1(ontotalcols,&oilocal)); 5449 dntotalcols = 0; 5450 ontotalcols = 0; 5451 ntotalcols = 0; 5452 for (i=0;i<nrows;i++) { 5453 owner = 0; 5454 PetscCall(PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL)); 5455 /* Set iremote for diag matrix */ 5456 for (j=0;j<nlcols[i*2+0];j++) { 5457 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5458 iremote[dntotalcols].rank = owner; 5459 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5460 ilocal[dntotalcols++] = ntotalcols++; 5461 } 5462 /* off diag */ 5463 for (j=0;j<nlcols[i*2+1];j++) { 5464 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5465 oiremote[ontotalcols].rank = owner; 5466 oilocal[ontotalcols++] = ntotalcols++; 5467 } 5468 } 5469 PetscCall(ISRestoreIndices(rows,&lrowindices)); 5470 PetscCall(PetscFree(loffsets)); 5471 PetscCall(PetscFree(nlcols)); 5472 PetscCall(PetscSFCreate(comm,&sf)); 5473 /* P serves as roots and P_oth is leaves 5474 * Diag matrix 5475 * */ 5476 PetscCall(PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 5477 PetscCall(PetscSFSetFromOptions(sf)); 5478 PetscCall(PetscSFSetUp(sf)); 5479 5480 PetscCall(PetscSFCreate(comm,&osf)); 5481 /* Off diag */ 5482 PetscCall(PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER)); 5483 PetscCall(PetscSFSetFromOptions(osf)); 5484 PetscCall(PetscSFSetUp(osf)); 5485 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5486 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5487 /* We operate on the matrix internal data for saving memory */ 5488 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5489 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5490 PetscCall(MatGetOwnershipRangeColumn(P,&pcstart,NULL)); 5491 /* Convert to global indices for diag matrix */ 5492 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5493 PetscCall(PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5494 /* We want P_oth store global indices */ 5495 PetscCall(ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping)); 5496 /* Use memory scalable approach */ 5497 PetscCall(ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH)); 5498 PetscCall(ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j)); 5499 PetscCall(PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5500 PetscCall(PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE)); 5501 /* Convert back to local indices */ 5502 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5503 PetscCall(PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE)); 5504 nout = 0; 5505 PetscCall(ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j)); 5506 PetscCheck(nout == po->i[plocalsize],comm,PETSC_ERR_ARG_INCOMP,"n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ",po->i[plocalsize],nout); 5507 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5508 /* Exchange values */ 5509 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5510 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5511 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5512 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5513 /* Stop PETSc from shrinking memory */ 5514 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5515 PetscCall(MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY)); 5516 PetscCall(MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY)); 5517 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5518 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf)); 5519 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf)); 5520 PetscCall(PetscSFDestroy(&sf)); 5521 PetscCall(PetscSFDestroy(&osf)); 5522 PetscFunctionReturn(0); 5523 } 5524 5525 /* 5526 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5527 * This supports MPIAIJ and MAIJ 5528 * */ 5529 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5530 { 5531 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5532 Mat_SeqAIJ *p_oth; 5533 IS rows,map; 5534 PetscHMapI hamp; 5535 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5536 MPI_Comm comm; 5537 PetscSF sf,osf; 5538 PetscBool has; 5539 5540 PetscFunctionBegin; 5541 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5542 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0)); 5543 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5544 * and then create a submatrix (that often is an overlapping matrix) 5545 * */ 5546 if (reuse == MAT_INITIAL_MATRIX) { 5547 /* Use a hash table to figure out unique keys */ 5548 PetscCall(PetscHMapICreate(&hamp)); 5549 PetscCall(PetscHMapIResize(hamp,a->B->cmap->n)); 5550 PetscCall(PetscCalloc1(a->B->cmap->n,&mapping)); 5551 count = 0; 5552 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5553 for (i=0;i<a->B->cmap->n;i++) { 5554 key = a->garray[i]/dof; 5555 PetscCall(PetscHMapIHas(hamp,key,&has)); 5556 if (!has) { 5557 mapping[i] = count; 5558 PetscCall(PetscHMapISet(hamp,key,count++)); 5559 } else { 5560 /* Current 'i' has the same value the previous step */ 5561 mapping[i] = count-1; 5562 } 5563 } 5564 PetscCall(ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map)); 5565 PetscCall(PetscHMapIGetSize(hamp,&htsize)); 5566 PetscCheck(htsize==count,comm,PETSC_ERR_ARG_INCOMP," Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ",htsize,count); 5567 PetscCall(PetscCalloc1(htsize,&rowindices)); 5568 off = 0; 5569 PetscCall(PetscHMapIGetKeys(hamp,&off,rowindices)); 5570 PetscCall(PetscHMapIDestroy(&hamp)); 5571 PetscCall(PetscSortInt(htsize,rowindices)); 5572 PetscCall(ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows)); 5573 /* In case, the matrix was already created but users want to recreate the matrix */ 5574 PetscCall(MatDestroy(P_oth)); 5575 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth)); 5576 PetscCall(PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map)); 5577 PetscCall(ISDestroy(&map)); 5578 PetscCall(ISDestroy(&rows)); 5579 } else if (reuse == MAT_REUSE_MATRIX) { 5580 /* If matrix was already created, we simply update values using SF objects 5581 * that as attached to the matrix ealier. 5582 */ 5583 const PetscScalar *pd_a,*po_a; 5584 5585 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf)); 5586 PetscCall(PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf)); 5587 PetscCheck(sf && osf,comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5588 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5589 /* Update values in place */ 5590 PetscCall(MatSeqAIJGetArrayRead(p->A,&pd_a)); 5591 PetscCall(MatSeqAIJGetArrayRead(p->B,&po_a)); 5592 PetscCall(PetscSFBcastBegin(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5593 PetscCall(PetscSFBcastBegin(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5594 PetscCall(PetscSFBcastEnd(sf,MPIU_SCALAR,pd_a,p_oth->a,MPI_REPLACE)); 5595 PetscCall(PetscSFBcastEnd(osf,MPIU_SCALAR,po_a,p_oth->a,MPI_REPLACE)); 5596 PetscCall(MatSeqAIJRestoreArrayRead(p->A,&pd_a)); 5597 PetscCall(MatSeqAIJRestoreArrayRead(p->B,&po_a)); 5598 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5599 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0)); 5600 PetscFunctionReturn(0); 5601 } 5602 5603 /*@C 5604 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5605 5606 Collective on Mat 5607 5608 Input Parameters: 5609 + A - the first matrix in mpiaij format 5610 . B - the second matrix in mpiaij format 5611 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5612 5613 Output Parameters: 5614 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5615 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5616 - B_seq - the sequential matrix generated 5617 5618 Level: developer 5619 5620 @*/ 5621 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5622 { 5623 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5624 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5625 IS isrowb,iscolb; 5626 Mat *bseq=NULL; 5627 5628 PetscFunctionBegin; 5629 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5630 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5631 } 5632 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0)); 5633 5634 if (scall == MAT_INITIAL_MATRIX) { 5635 start = A->cmap->rstart; 5636 cmap = a->garray; 5637 nzA = a->A->cmap->n; 5638 nzB = a->B->cmap->n; 5639 PetscCall(PetscMalloc1(nzA+nzB, &idx)); 5640 ncols = 0; 5641 for (i=0; i<nzB; i++) { /* row < local row index */ 5642 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5643 else break; 5644 } 5645 imark = i; 5646 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5647 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5648 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb)); 5649 PetscCall(ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb)); 5650 } else { 5651 PetscCheck(rowb && colb,PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5652 isrowb = *rowb; iscolb = *colb; 5653 PetscCall(PetscMalloc1(1,&bseq)); 5654 bseq[0] = *B_seq; 5655 } 5656 PetscCall(MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq)); 5657 *B_seq = bseq[0]; 5658 PetscCall(PetscFree(bseq)); 5659 if (!rowb) { 5660 PetscCall(ISDestroy(&isrowb)); 5661 } else { 5662 *rowb = isrowb; 5663 } 5664 if (!colb) { 5665 PetscCall(ISDestroy(&iscolb)); 5666 } else { 5667 *colb = iscolb; 5668 } 5669 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0)); 5670 PetscFunctionReturn(0); 5671 } 5672 5673 /* 5674 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5675 of the OFF-DIAGONAL portion of local A 5676 5677 Collective on Mat 5678 5679 Input Parameters: 5680 + A,B - the matrices in mpiaij format 5681 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5682 5683 Output Parameter: 5684 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5685 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5686 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5687 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5688 5689 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5690 for this matrix. This is not desirable.. 5691 5692 Level: developer 5693 5694 */ 5695 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5696 { 5697 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5698 Mat_SeqAIJ *b_oth; 5699 VecScatter ctx; 5700 MPI_Comm comm; 5701 const PetscMPIInt *rprocs,*sprocs; 5702 const PetscInt *srow,*rstarts,*sstarts; 5703 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5704 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5705 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5706 MPI_Request *reqs = NULL,*rwaits = NULL,*swaits = NULL; 5707 PetscMPIInt size,tag,rank,nreqs; 5708 5709 PetscFunctionBegin; 5710 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 5711 PetscCallMPI(MPI_Comm_size(comm,&size)); 5712 5713 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5714 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5715 } 5716 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0)); 5717 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 5718 5719 if (size == 1) { 5720 startsj_s = NULL; 5721 bufa_ptr = NULL; 5722 *B_oth = NULL; 5723 PetscFunctionReturn(0); 5724 } 5725 5726 ctx = a->Mvctx; 5727 tag = ((PetscObject)ctx)->tag; 5728 5729 PetscCall(VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5730 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5731 PetscCall(VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs)); 5732 PetscCall(PetscMPIIntCast(nsends+nrecvs,&nreqs)); 5733 PetscCall(PetscMalloc1(nreqs,&reqs)); 5734 rwaits = reqs; 5735 swaits = reqs + nrecvs; 5736 5737 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5738 if (scall == MAT_INITIAL_MATRIX) { 5739 /* i-array */ 5740 /*---------*/ 5741 /* post receives */ 5742 if (nrecvs) PetscCall(PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5743 for (i=0; i<nrecvs; i++) { 5744 rowlen = rvalues + rstarts[i]*rbs; 5745 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5746 PetscCallMPI(MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5747 } 5748 5749 /* pack the outgoing message */ 5750 PetscCall(PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj)); 5751 5752 sstartsj[0] = 0; 5753 rstartsj[0] = 0; 5754 len = 0; /* total length of j or a array to be sent */ 5755 if (nsends) { 5756 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5757 PetscCall(PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues)); 5758 } 5759 for (i=0; i<nsends; i++) { 5760 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5761 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5762 for (j=0; j<nrows; j++) { 5763 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5764 for (l=0; l<sbs; l++) { 5765 PetscCall(MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); /* rowlength */ 5766 5767 rowlen[j*sbs+l] = ncols; 5768 5769 len += ncols; 5770 PetscCall(MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL)); 5771 } 5772 k++; 5773 } 5774 PetscCallMPI(MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5775 5776 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5777 } 5778 /* recvs and sends of i-array are completed */ 5779 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5780 PetscCall(PetscFree(svalues)); 5781 5782 /* allocate buffers for sending j and a arrays */ 5783 PetscCall(PetscMalloc1(len+1,&bufj)); 5784 PetscCall(PetscMalloc1(len+1,&bufa)); 5785 5786 /* create i-array of B_oth */ 5787 PetscCall(PetscMalloc1(aBn+2,&b_othi)); 5788 5789 b_othi[0] = 0; 5790 len = 0; /* total length of j or a array to be received */ 5791 k = 0; 5792 for (i=0; i<nrecvs; i++) { 5793 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5794 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5795 for (j=0; j<nrows; j++) { 5796 b_othi[k+1] = b_othi[k] + rowlen[j]; 5797 PetscCall(PetscIntSumError(rowlen[j],len,&len)); 5798 k++; 5799 } 5800 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5801 } 5802 PetscCall(PetscFree(rvalues)); 5803 5804 /* allocate space for j and a arrays of B_oth */ 5805 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_othj)); 5806 PetscCall(PetscMalloc1(b_othi[aBn]+1,&b_otha)); 5807 5808 /* j-array */ 5809 /*---------*/ 5810 /* post receives of j-array */ 5811 for (i=0; i<nrecvs; i++) { 5812 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5813 PetscCallMPI(MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i)); 5814 } 5815 5816 /* pack the outgoing message j-array */ 5817 if (nsends) k = sstarts[0]; 5818 for (i=0; i<nsends; i++) { 5819 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5820 bufJ = bufj+sstartsj[i]; 5821 for (j=0; j<nrows; j++) { 5822 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5823 for (ll=0; ll<sbs; ll++) { 5824 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5825 for (l=0; l<ncols; l++) { 5826 *bufJ++ = cols[l]; 5827 } 5828 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL)); 5829 } 5830 } 5831 PetscCallMPI(MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i)); 5832 } 5833 5834 /* recvs and sends of j-array are completed */ 5835 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5836 } else if (scall == MAT_REUSE_MATRIX) { 5837 sstartsj = *startsj_s; 5838 rstartsj = *startsj_r; 5839 bufa = *bufa_ptr; 5840 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5841 PetscCall(MatSeqAIJGetArrayWrite(*B_oth,&b_otha)); 5842 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5843 5844 /* a-array */ 5845 /*---------*/ 5846 /* post receives of a-array */ 5847 for (i=0; i<nrecvs; i++) { 5848 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5849 PetscCallMPI(MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)); 5850 } 5851 5852 /* pack the outgoing message a-array */ 5853 if (nsends) k = sstarts[0]; 5854 for (i=0; i<nsends; i++) { 5855 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5856 bufA = bufa+sstartsj[i]; 5857 for (j=0; j<nrows; j++) { 5858 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5859 for (ll=0; ll<sbs; ll++) { 5860 PetscCall(MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5861 for (l=0; l<ncols; l++) { 5862 *bufA++ = vals[l]; 5863 } 5864 PetscCall(MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals)); 5865 } 5866 } 5867 PetscCallMPI(MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)); 5868 } 5869 /* recvs and sends of a-array are completed */ 5870 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE)); 5871 PetscCall(PetscFree(reqs)); 5872 5873 if (scall == MAT_INITIAL_MATRIX) { 5874 /* put together the new matrix */ 5875 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth)); 5876 5877 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5878 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5879 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5880 b_oth->free_a = PETSC_TRUE; 5881 b_oth->free_ij = PETSC_TRUE; 5882 b_oth->nonew = 0; 5883 5884 PetscCall(PetscFree(bufj)); 5885 if (!startsj_s || !bufa_ptr) { 5886 PetscCall(PetscFree2(sstartsj,rstartsj)); 5887 PetscCall(PetscFree(bufa_ptr)); 5888 } else { 5889 *startsj_s = sstartsj; 5890 *startsj_r = rstartsj; 5891 *bufa_ptr = bufa; 5892 } 5893 } else if (scall == MAT_REUSE_MATRIX) { 5894 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth,&b_otha)); 5895 } 5896 5897 PetscCall(VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs)); 5898 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs)); 5899 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0)); 5900 PetscFunctionReturn(0); 5901 } 5902 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5906 #if defined(PETSC_HAVE_MKL_SPARSE) 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5908 #endif 5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5911 #if defined(PETSC_HAVE_ELEMENTAL) 5912 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5913 #endif 5914 #if defined(PETSC_HAVE_SCALAPACK) 5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5916 #endif 5917 #if defined(PETSC_HAVE_HYPRE) 5918 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5919 #endif 5920 #if defined(PETSC_HAVE_CUDA) 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5922 #endif 5923 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5924 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*); 5925 #endif 5926 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5927 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5928 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5929 5930 /* 5931 Computes (B'*A')' since computing B*A directly is untenable 5932 5933 n p p 5934 [ ] [ ] [ ] 5935 m [ A ] * n [ B ] = m [ C ] 5936 [ ] [ ] [ ] 5937 5938 */ 5939 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5940 { 5941 Mat At,Bt,Ct; 5942 5943 PetscFunctionBegin; 5944 PetscCall(MatTranspose(A,MAT_INITIAL_MATRIX,&At)); 5945 PetscCall(MatTranspose(B,MAT_INITIAL_MATRIX,&Bt)); 5946 PetscCall(MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct)); 5947 PetscCall(MatDestroy(&At)); 5948 PetscCall(MatDestroy(&Bt)); 5949 PetscCall(MatTranspose(Ct,MAT_REUSE_MATRIX,&C)); 5950 PetscCall(MatDestroy(&Ct)); 5951 PetscFunctionReturn(0); 5952 } 5953 5954 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5955 { 5956 PetscBool cisdense; 5957 5958 PetscFunctionBegin; 5959 PetscCheck(A->cmap->n == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT,A->cmap->n,B->rmap->n); 5960 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N)); 5961 PetscCall(MatSetBlockSizesFromMats(C,A,B)); 5962 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 5963 if (!cisdense) { 5964 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 5965 } 5966 PetscCall(MatSetUp(C)); 5967 5968 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5969 PetscFunctionReturn(0); 5970 } 5971 5972 /* ----------------------------------------------------------------*/ 5973 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5974 { 5975 Mat_Product *product = C->product; 5976 Mat A = product->A,B=product->B; 5977 5978 PetscFunctionBegin; 5979 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5980 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5981 5982 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5983 C->ops->productsymbolic = MatProductSymbolic_AB; 5984 PetscFunctionReturn(0); 5985 } 5986 5987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5988 { 5989 Mat_Product *product = C->product; 5990 5991 PetscFunctionBegin; 5992 if (product->type == MATPRODUCT_AB) { 5993 PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 5994 } 5995 PetscFunctionReturn(0); 5996 } 5997 5998 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5999 6000 Input Parameters: 6001 6002 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6003 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6004 6005 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6006 6007 For Set1, j1[] contains column indices of the nonzeros. 6008 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6009 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6010 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6011 6012 Similar for Set2. 6013 6014 This routine merges the two sets of nonzeros row by row and removes repeats. 6015 6016 Output Parameters: (memory is allocated by the caller) 6017 6018 i[],j[]: the CSR of the merged matrix, which has m rows. 6019 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6020 imap2[]: similar to imap1[], but for Set2. 6021 Note we order nonzeros row-by-row and from left to right. 6022 */ 6023 static PetscErrorCode MatMergeEntries_Internal(Mat mat,const PetscInt j1[],const PetscInt j2[],const PetscCount rowBegin1[],const PetscCount rowEnd1[], 6024 const PetscCount rowBegin2[],const PetscCount rowEnd2[],const PetscCount jmap1[],const PetscCount jmap2[], 6025 PetscCount imap1[],PetscCount imap2[],PetscInt i[],PetscInt j[]) 6026 { 6027 PetscInt r,m; /* Row index of mat */ 6028 PetscCount t,t1,t2,b1,e1,b2,e2; 6029 6030 PetscFunctionBegin; 6031 PetscCall(MatGetLocalSize(mat,&m,NULL)); 6032 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6033 i[0] = 0; 6034 for (r=0; r<m; r++) { /* Do row by row merging */ 6035 b1 = rowBegin1[r]; 6036 e1 = rowEnd1[r]; 6037 b2 = rowBegin2[r]; 6038 e2 = rowEnd2[r]; 6039 while (b1 < e1 && b2 < e2) { 6040 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6041 j[t] = j1[b1]; 6042 imap1[t1] = t; 6043 imap2[t2] = t; 6044 b1 += jmap1[t1+1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6045 b2 += jmap2[t2+1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6046 t1++; t2++; t++; 6047 } else if (j1[b1] < j2[b2]) { 6048 j[t] = j1[b1]; 6049 imap1[t1] = t; 6050 b1 += jmap1[t1+1] - jmap1[t1]; 6051 t1++; t++; 6052 } else { 6053 j[t] = j2[b2]; 6054 imap2[t2] = t; 6055 b2 += jmap2[t2+1] - jmap2[t2]; 6056 t2++; t++; 6057 } 6058 } 6059 /* Merge the remaining in either j1[] or j2[] */ 6060 while (b1 < e1) { 6061 j[t] = j1[b1]; 6062 imap1[t1] = t; 6063 b1 += jmap1[t1+1] - jmap1[t1]; 6064 t1++; t++; 6065 } 6066 while (b2 < e2) { 6067 j[t] = j2[b2]; 6068 imap2[t2] = t; 6069 b2 += jmap2[t2+1] - jmap2[t2]; 6070 t2++; t++; 6071 } 6072 i[r+1] = t; 6073 } 6074 PetscFunctionReturn(0); 6075 } 6076 6077 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6078 6079 Input Parameters: 6080 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6081 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6082 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6083 6084 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6085 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6086 6087 Output Parameters: 6088 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6089 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6090 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6091 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6092 6093 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6094 Atot: number of entries belonging to the diagonal block. 6095 Annz: number of unique nonzeros belonging to the diagonal block. 6096 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6097 repeats (i.e., same 'i,j' pair). 6098 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6099 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6100 6101 Atot: number of entries belonging to the diagonal block 6102 Annz: number of unique nonzeros belonging to the diagonal block. 6103 6104 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6105 6106 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6107 */ 6108 static PetscErrorCode MatSplitEntries_Internal(Mat mat,PetscCount n,const PetscInt i[],PetscInt j[], 6109 PetscCount perm[],PetscCount rowBegin[],PetscCount rowMid[],PetscCount rowEnd[], 6110 PetscCount *Atot_,PetscCount **Aperm_,PetscCount *Annz_,PetscCount **Ajmap_, 6111 PetscCount *Btot_,PetscCount **Bperm_,PetscCount *Bnnz_,PetscCount **Bjmap_) 6112 { 6113 PetscInt cstart,cend,rstart,rend,row,col; 6114 PetscCount Atot=0,Btot=0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6115 PetscCount Annz=0,Bnnz=0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6116 PetscCount k,m,p,q,r,s,mid; 6117 PetscCount *Aperm,*Bperm,*Ajmap,*Bjmap; 6118 6119 PetscFunctionBegin; 6120 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6121 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6122 m = rend - rstart; 6123 6124 for (k=0; k<n; k++) {if (i[k]>=0) break;} /* Skip negative rows */ 6125 6126 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6127 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6128 */ 6129 while (k<n) { 6130 row = i[k]; 6131 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6132 for (s=k; s<n; s++) if (i[s] != row) break; 6133 for (p=k; p<s; p++) { 6134 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6135 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N),PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index %" PetscInt_FMT " is out of range",j[p]); 6136 } 6137 PetscCall(PetscSortIntWithCountArray(s-k,j+k,perm+k)); 6138 PetscCall(PetscSortedIntUpperBound(j,k,s,-1,&mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6139 rowBegin[row-rstart] = k; 6140 rowMid[row-rstart] = mid; 6141 rowEnd[row-rstart] = s; 6142 6143 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6144 Atot += mid - k; 6145 Btot += s - mid; 6146 6147 /* Count unique nonzeros of this diag/offdiag row */ 6148 for (p=k; p<mid;) { 6149 col = j[p]; 6150 do {j[p] += PETSC_MAX_INT; p++;} while (p<mid && j[p] == col); /* Revert the modified diagonal indices */ 6151 Annz++; 6152 } 6153 6154 for (p=mid; p<s;) { 6155 col = j[p]; 6156 do {p++;} while (p<s && j[p] == col); 6157 Bnnz++; 6158 } 6159 k = s; 6160 } 6161 6162 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6163 PetscCall(PetscMalloc1(Atot,&Aperm)); 6164 PetscCall(PetscMalloc1(Btot,&Bperm)); 6165 PetscCall(PetscMalloc1(Annz+1,&Ajmap)); 6166 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap)); 6167 6168 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6169 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6170 for (r=0; r<m; r++) { 6171 k = rowBegin[r]; 6172 mid = rowMid[r]; 6173 s = rowEnd[r]; 6174 PetscCall(PetscArraycpy(Aperm+Atot,perm+k, mid-k)); 6175 PetscCall(PetscArraycpy(Bperm+Btot,perm+mid,s-mid)); 6176 Atot += mid - k; 6177 Btot += s - mid; 6178 6179 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6180 for (p=k; p<mid;) { 6181 col = j[p]; 6182 q = p; 6183 do {p++;} while (p<mid && j[p] == col); 6184 Ajmap[Annz+1] = Ajmap[Annz] + (p - q); 6185 Annz++; 6186 } 6187 6188 for (p=mid; p<s;) { 6189 col = j[p]; 6190 q = p; 6191 do {p++;} while (p<s && j[p] == col); 6192 Bjmap[Bnnz+1] = Bjmap[Bnnz] + (p - q); 6193 Bnnz++; 6194 } 6195 } 6196 /* Output */ 6197 *Aperm_ = Aperm; 6198 *Annz_ = Annz; 6199 *Atot_ = Atot; 6200 *Ajmap_ = Ajmap; 6201 *Bperm_ = Bperm; 6202 *Bnnz_ = Bnnz; 6203 *Btot_ = Btot; 6204 *Bjmap_ = Bjmap; 6205 PetscFunctionReturn(0); 6206 } 6207 6208 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6209 6210 Input Parameters: 6211 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6212 nnz: number of unique nonzeros in the merged matrix 6213 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6214 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6215 6216 Output Parameter: (memory is allocated by the caller) 6217 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6218 6219 Example: 6220 nnz1 = 4 6221 nnz = 6 6222 imap = [1,3,4,5] 6223 jmap = [0,3,5,6,7] 6224 then, 6225 jmap_new = [0,0,3,3,5,6,7] 6226 */ 6227 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1,PetscCount nnz,const PetscCount imap[],const PetscCount jmap[],PetscCount jmap_new[]) 6228 { 6229 PetscCount k,p; 6230 6231 PetscFunctionBegin; 6232 jmap_new[0] = 0; 6233 p = nnz; /* p loops over jmap_new[] backwards */ 6234 for (k=nnz1-1; k>=0; k--) { /* k loops over imap[] */ 6235 for (; p > imap[k]; p--) jmap_new[p] = jmap[k+1]; 6236 } 6237 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6238 PetscFunctionReturn(0); 6239 } 6240 6241 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, const PetscInt coo_i[], const PetscInt coo_j[]) 6242 { 6243 MPI_Comm comm; 6244 PetscMPIInt rank,size; 6245 PetscInt m,n,M,N,rstart,rend,cstart,cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6246 PetscCount k,p,q,rem; /* Loop variables over coo arrays */ 6247 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6248 6249 PetscFunctionBegin; 6250 PetscCall(PetscFree(mpiaij->garray)); 6251 PetscCall(VecDestroy(&mpiaij->lvec)); 6252 #if defined(PETSC_USE_CTABLE) 6253 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6254 #else 6255 PetscCall(PetscFree(mpiaij->colmap)); 6256 #endif 6257 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6258 mat->assembled = PETSC_FALSE; 6259 mat->was_assembled = PETSC_FALSE; 6260 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6261 6262 PetscCall(PetscObjectGetComm((PetscObject)mat,&comm)); 6263 PetscCallMPI(MPI_Comm_size(comm,&size)); 6264 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 6265 PetscCall(PetscLayoutSetUp(mat->rmap)); 6266 PetscCall(PetscLayoutSetUp(mat->cmap)); 6267 PetscCall(PetscLayoutGetRange(mat->rmap,&rstart,&rend)); 6268 PetscCall(PetscLayoutGetRange(mat->cmap,&cstart,&cend)); 6269 PetscCall(MatGetLocalSize(mat,&m,&n)); 6270 PetscCall(MatGetSize(mat,&M,&N)); 6271 6272 /* ---------------------------------------------------------------------------*/ 6273 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6274 /* entries come first, then local rows, then remote rows. */ 6275 /* ---------------------------------------------------------------------------*/ 6276 PetscCount n1 = coo_n,*perm1; 6277 PetscInt *i1,*j1; /* Copies of input COOs along with a permutation array */ 6278 PetscCall(PetscMalloc3(n1,&i1,n1,&j1,n1,&perm1)); 6279 PetscCall(PetscArraycpy(i1,coo_i,n1)); /* Make a copy since we'll modify it */ 6280 PetscCall(PetscArraycpy(j1,coo_j,n1)); 6281 for (k=0; k<n1; k++) perm1[k] = k; 6282 6283 /* Manipulate indices so that entries with negative row or col indices will have smallest 6284 row indices, local entries will have greater but negative row indices, and remote entries 6285 will have positive row indices. 6286 */ 6287 for (k=0; k<n1; k++) { 6288 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6289 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6290 else PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6291 else if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6292 } 6293 6294 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6295 PetscCall(PetscSortIntWithIntCountArrayPair(n1,i1,j1,perm1)); 6296 for (k=0; k<n1; k++) {if (i1[k] > PETSC_MIN_INT) break;} /* Advance k to the first entry we need to take care of */ 6297 PetscCall(PetscSortedIntUpperBound(i1,k,n1,rend-1-PETSC_MAX_INT,&rem)); /* rem is upper bound of the last local row */ 6298 for (; k<rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6299 6300 /* ---------------------------------------------------------------------------*/ 6301 /* Split local rows into diag/offdiag portions */ 6302 /* ---------------------------------------------------------------------------*/ 6303 PetscCount *rowBegin1,*rowMid1,*rowEnd1; 6304 PetscCount *Ajmap1,*Aperm1,*Bjmap1,*Bperm1,*Cperm1; 6305 PetscCount Annz1,Bnnz1,Atot1,Btot1; 6306 6307 PetscCall(PetscCalloc3(m,&rowBegin1,m,&rowMid1,m,&rowEnd1)); 6308 PetscCall(PetscMalloc1(n1-rem,&Cperm1)); 6309 PetscCall(MatSplitEntries_Internal(mat,rem,i1,j1,perm1,rowBegin1,rowMid1,rowEnd1,&Atot1,&Aperm1,&Annz1,&Ajmap1,&Btot1,&Bperm1,&Bnnz1,&Bjmap1)); 6310 6311 /* ---------------------------------------------------------------------------*/ 6312 /* Send remote rows to their owner */ 6313 /* ---------------------------------------------------------------------------*/ 6314 /* Find which rows should be sent to which remote ranks*/ 6315 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6316 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6317 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6318 const PetscInt *ranges; 6319 PetscInt maxNsend = size >= 128? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6320 6321 PetscCall(PetscLayoutGetRanges(mat->rmap,&ranges)); 6322 PetscCall(PetscMalloc2(maxNsend,&sendto,maxNsend,&nentries)); 6323 for (k=rem; k<n1;) { 6324 PetscMPIInt owner; 6325 PetscInt firstRow,lastRow; 6326 6327 /* Locate a row range */ 6328 firstRow = i1[k]; /* first row of this owner */ 6329 PetscCall(PetscLayoutFindOwner(mat->rmap,firstRow,&owner)); 6330 lastRow = ranges[owner+1]-1; /* last row of this owner */ 6331 6332 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6333 PetscCall(PetscSortedIntUpperBound(i1,k,n1,lastRow,&p)); 6334 6335 /* All entries in [k,p) belong to this remote owner */ 6336 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6337 PetscMPIInt *sendto2; 6338 PetscInt *nentries2; 6339 PetscInt maxNsend2 = (maxNsend <= size/2) ? maxNsend*2 : size; 6340 6341 PetscCall(PetscMalloc2(maxNsend2,&sendto2,maxNsend2,&nentries2)); 6342 PetscCall(PetscArraycpy(sendto2,sendto,maxNsend)); 6343 PetscCall(PetscArraycpy(nentries2,nentries2,maxNsend+1)); 6344 PetscCall(PetscFree2(sendto,nentries2)); 6345 sendto = sendto2; 6346 nentries = nentries2; 6347 maxNsend = maxNsend2; 6348 } 6349 sendto[nsend] = owner; 6350 nentries[nsend] = p - k; 6351 PetscCall(PetscCountCast(p-k,&nentries[nsend])); 6352 nsend++; 6353 k = p; 6354 } 6355 6356 /* Build 1st SF to know offsets on remote to send data */ 6357 PetscSF sf1; 6358 PetscInt nroots = 1,nroots2 = 0; 6359 PetscInt nleaves = nsend,nleaves2 = 0; 6360 PetscInt *offsets; 6361 PetscSFNode *iremote; 6362 6363 PetscCall(PetscSFCreate(comm,&sf1)); 6364 PetscCall(PetscMalloc1(nsend,&iremote)); 6365 PetscCall(PetscMalloc1(nsend,&offsets)); 6366 for (k=0; k<nsend; k++) { 6367 iremote[k].rank = sendto[k]; 6368 iremote[k].index = 0; 6369 nleaves2 += nentries[k]; 6370 PetscCheck(nleaves2 >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF leaves is too large for PetscInt"); 6371 } 6372 PetscCall(PetscSFSetGraph(sf1,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6373 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1,MPIU_INT,PETSC_MEMTYPE_HOST,&nroots2/*rootdata*/,PETSC_MEMTYPE_HOST,nentries/*leafdata*/,PETSC_MEMTYPE_HOST,offsets/*leafupdate*/,MPI_SUM)); 6374 PetscCall(PetscSFFetchAndOpEnd(sf1,MPIU_INT,&nroots2,nentries,offsets,MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6375 PetscCall(PetscSFDestroy(&sf1)); 6376 PetscAssert(nleaves2 == n1-rem,PETSC_COMM_SELF,PETSC_ERR_PLIB,"nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "",nleaves2,n1-rem); 6377 6378 /* Build 2nd SF to send remote COOs to their owner */ 6379 PetscSF sf2; 6380 nroots = nroots2; 6381 nleaves = nleaves2; 6382 PetscCall(PetscSFCreate(comm,&sf2)); 6383 PetscCall(PetscSFSetFromOptions(sf2)); 6384 PetscCall(PetscMalloc1(nleaves,&iremote)); 6385 p = 0; 6386 for (k=0; k<nsend; k++) { 6387 PetscCheck(offsets[k] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of SF roots is too large for PetscInt"); 6388 for (q=0; q<nentries[k]; q++,p++) { 6389 iremote[p].rank = sendto[k]; 6390 iremote[p].index = offsets[k] + q; 6391 } 6392 } 6393 PetscCall(PetscSFSetGraph(sf2,nroots,nleaves,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 6394 6395 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6396 PetscCall(PetscArraycpy(Cperm1,perm1+rem,n1-rem)); 6397 6398 /* Send the remote COOs to their owner */ 6399 PetscInt n2 = nroots,*i2,*j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6400 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6401 PetscCall(PetscMalloc3(n2,&i2,n2,&j2,n2,&perm2)); 6402 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,i1+rem,PETSC_MEMTYPE_HOST,i2,MPI_REPLACE)); 6403 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,i1+rem,i2,MPI_REPLACE)); 6404 PetscCall(PetscSFReduceWithMemTypeBegin(sf2,MPIU_INT,PETSC_MEMTYPE_HOST,j1+rem,PETSC_MEMTYPE_HOST,j2,MPI_REPLACE)); 6405 PetscCall(PetscSFReduceEnd(sf2,MPIU_INT,j1+rem,j2,MPI_REPLACE)); 6406 6407 PetscCall(PetscFree(offsets)); 6408 PetscCall(PetscFree2(sendto,nentries)); 6409 6410 /* ---------------------------------------------------------------*/ 6411 /* Sort received COOs by row along with the permutation array */ 6412 /* ---------------------------------------------------------------*/ 6413 for (k=0; k<n2; k++) perm2[k] = k; 6414 PetscCall(PetscSortIntWithIntCountArrayPair(n2,i2,j2,perm2)); 6415 6416 /* ---------------------------------------------------------------*/ 6417 /* Split received COOs into diag/offdiag portions */ 6418 /* ---------------------------------------------------------------*/ 6419 PetscCount *rowBegin2,*rowMid2,*rowEnd2; 6420 PetscCount *Ajmap2,*Aperm2,*Bjmap2,*Bperm2; 6421 PetscCount Annz2,Bnnz2,Atot2,Btot2; 6422 6423 PetscCall(PetscCalloc3(m,&rowBegin2,m,&rowMid2,m,&rowEnd2)); 6424 PetscCall(MatSplitEntries_Internal(mat,n2,i2,j2,perm2,rowBegin2,rowMid2,rowEnd2,&Atot2,&Aperm2,&Annz2,&Ajmap2,&Btot2,&Bperm2,&Bnnz2,&Bjmap2)); 6425 6426 /* --------------------------------------------------------------------------*/ 6427 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6428 /* --------------------------------------------------------------------------*/ 6429 PetscInt *Ai,*Bi; 6430 PetscInt *Aj,*Bj; 6431 6432 PetscCall(PetscMalloc1(m+1,&Ai)); 6433 PetscCall(PetscMalloc1(m+1,&Bi)); 6434 PetscCall(PetscMalloc1(Annz1+Annz2,&Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6435 PetscCall(PetscMalloc1(Bnnz1+Bnnz2,&Bj)); 6436 6437 PetscCount *Aimap1,*Bimap1,*Aimap2,*Bimap2; 6438 PetscCall(PetscMalloc1(Annz1,&Aimap1)); 6439 PetscCall(PetscMalloc1(Bnnz1,&Bimap1)); 6440 PetscCall(PetscMalloc1(Annz2,&Aimap2)); 6441 PetscCall(PetscMalloc1(Bnnz2,&Bimap2)); 6442 6443 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowBegin1,rowMid1,rowBegin2,rowMid2,Ajmap1,Ajmap2,Aimap1,Aimap2,Ai,Aj)); 6444 PetscCall(MatMergeEntries_Internal(mat,j1,j2,rowMid1, rowEnd1,rowMid2, rowEnd2,Bjmap1,Bjmap2,Bimap1,Bimap2,Bi,Bj)); 6445 6446 /* --------------------------------------------------------------------------*/ 6447 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6448 /* expect nonzeros in A/B most likely have local contributing entries */ 6449 /* --------------------------------------------------------------------------*/ 6450 PetscInt Annz = Ai[m]; 6451 PetscInt Bnnz = Bi[m]; 6452 PetscCount *Ajmap1_new,*Bjmap1_new; 6453 6454 PetscCall(PetscMalloc1(Annz+1,&Ajmap1_new)); 6455 PetscCall(PetscMalloc1(Bnnz+1,&Bjmap1_new)); 6456 6457 PetscCall(ExpandJmap_Internal(Annz1,Annz,Aimap1,Ajmap1,Ajmap1_new)); 6458 PetscCall(ExpandJmap_Internal(Bnnz1,Bnnz,Bimap1,Bjmap1,Bjmap1_new)); 6459 6460 PetscCall(PetscFree(Aimap1)); 6461 PetscCall(PetscFree(Ajmap1)); 6462 PetscCall(PetscFree(Bimap1)); 6463 PetscCall(PetscFree(Bjmap1)); 6464 PetscCall(PetscFree3(rowBegin1,rowMid1,rowEnd1)); 6465 PetscCall(PetscFree3(rowBegin2,rowMid2,rowEnd2)); 6466 PetscCall(PetscFree3(i1,j1,perm1)); 6467 PetscCall(PetscFree3(i2,j2,perm2)); 6468 6469 Ajmap1 = Ajmap1_new; 6470 Bjmap1 = Bjmap1_new; 6471 6472 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6473 if (Annz < Annz1 + Annz2) { 6474 PetscInt *Aj_new; 6475 PetscCall(PetscMalloc1(Annz,&Aj_new)); 6476 PetscCall(PetscArraycpy(Aj_new,Aj,Annz)); 6477 PetscCall(PetscFree(Aj)); 6478 Aj = Aj_new; 6479 } 6480 6481 if (Bnnz < Bnnz1 + Bnnz2) { 6482 PetscInt *Bj_new; 6483 PetscCall(PetscMalloc1(Bnnz,&Bj_new)); 6484 PetscCall(PetscArraycpy(Bj_new,Bj,Bnnz)); 6485 PetscCall(PetscFree(Bj)); 6486 Bj = Bj_new; 6487 } 6488 6489 /* --------------------------------------------------------------------------------*/ 6490 /* Create new submatrices for on-process and off-process coupling */ 6491 /* --------------------------------------------------------------------------------*/ 6492 PetscScalar *Aa,*Ba; 6493 MatType rtype; 6494 Mat_SeqAIJ *a,*b; 6495 PetscCall(PetscCalloc1(Annz,&Aa)); /* Zero matrix on device */ 6496 PetscCall(PetscCalloc1(Bnnz,&Ba)); 6497 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6498 if (cstart) {for (k=0; k<Annz; k++) Aj[k] -= cstart;} 6499 PetscCall(MatDestroy(&mpiaij->A)); 6500 PetscCall(MatDestroy(&mpiaij->B)); 6501 PetscCall(MatGetRootType_Private(mat,&rtype)); 6502 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,Ai,Aj,Aa,&mpiaij->A)); 6503 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,mat->cmap->N,Bi,Bj,Ba,&mpiaij->B)); 6504 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6505 6506 a = (Mat_SeqAIJ*)mpiaij->A->data; 6507 b = (Mat_SeqAIJ*)mpiaij->B->data; 6508 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6509 a->free_a = b->free_a = PETSC_TRUE; 6510 a->free_ij = b->free_ij = PETSC_TRUE; 6511 6512 /* conversion must happen AFTER multiply setup */ 6513 PetscCall(MatConvert(mpiaij->A,rtype,MAT_INPLACE_MATRIX,&mpiaij->A)); 6514 PetscCall(MatConvert(mpiaij->B,rtype,MAT_INPLACE_MATRIX,&mpiaij->B)); 6515 PetscCall(VecDestroy(&mpiaij->lvec)); 6516 PetscCall(MatCreateVecs(mpiaij->B,&mpiaij->lvec,NULL)); 6517 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)mpiaij->lvec)); 6518 6519 mpiaij->coo_n = coo_n; 6520 mpiaij->coo_sf = sf2; 6521 mpiaij->sendlen = nleaves; 6522 mpiaij->recvlen = nroots; 6523 6524 mpiaij->Annz = Annz; 6525 mpiaij->Bnnz = Bnnz; 6526 6527 mpiaij->Annz2 = Annz2; 6528 mpiaij->Bnnz2 = Bnnz2; 6529 6530 mpiaij->Atot1 = Atot1; 6531 mpiaij->Atot2 = Atot2; 6532 mpiaij->Btot1 = Btot1; 6533 mpiaij->Btot2 = Btot2; 6534 6535 mpiaij->Ajmap1 = Ajmap1; 6536 mpiaij->Aperm1 = Aperm1; 6537 6538 mpiaij->Bjmap1 = Bjmap1; 6539 mpiaij->Bperm1 = Bperm1; 6540 6541 mpiaij->Aimap2 = Aimap2; 6542 mpiaij->Ajmap2 = Ajmap2; 6543 mpiaij->Aperm2 = Aperm2; 6544 6545 mpiaij->Bimap2 = Bimap2; 6546 mpiaij->Bjmap2 = Bjmap2; 6547 mpiaij->Bperm2 = Bperm2; 6548 6549 mpiaij->Cperm1 = Cperm1; 6550 6551 /* Allocate in preallocation. If not used, it has zero cost on host */ 6552 PetscCall(PetscMalloc2(mpiaij->sendlen,&mpiaij->sendbuf,mpiaij->recvlen,&mpiaij->recvbuf)); 6553 PetscFunctionReturn(0); 6554 } 6555 6556 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat,const PetscScalar v[],InsertMode imode) 6557 { 6558 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*)mat->data; 6559 Mat A = mpiaij->A,B = mpiaij->B; 6560 PetscCount Annz = mpiaij->Annz,Annz2 = mpiaij->Annz2,Bnnz = mpiaij->Bnnz,Bnnz2 = mpiaij->Bnnz2; 6561 PetscScalar *Aa,*Ba; 6562 PetscScalar *sendbuf = mpiaij->sendbuf; 6563 PetscScalar *recvbuf = mpiaij->recvbuf; 6564 const PetscCount *Ajmap1 = mpiaij->Ajmap1,*Ajmap2 = mpiaij->Ajmap2,*Aimap2 = mpiaij->Aimap2; 6565 const PetscCount *Bjmap1 = mpiaij->Bjmap1,*Bjmap2 = mpiaij->Bjmap2,*Bimap2 = mpiaij->Bimap2; 6566 const PetscCount *Aperm1 = mpiaij->Aperm1,*Aperm2 = mpiaij->Aperm2,*Bperm1 = mpiaij->Bperm1,*Bperm2 = mpiaij->Bperm2; 6567 const PetscCount *Cperm1 = mpiaij->Cperm1; 6568 6569 PetscFunctionBegin; 6570 PetscCall(MatSeqAIJGetArray(A,&Aa)); /* Might read and write matrix values */ 6571 PetscCall(MatSeqAIJGetArray(B,&Ba)); 6572 6573 /* Pack entries to be sent to remote */ 6574 for (PetscCount i=0; i<mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6575 6576 /* Send remote entries to their owner and overlap the communication with local computation */ 6577 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf,MPIU_SCALAR,PETSC_MEMTYPE_HOST,sendbuf,PETSC_MEMTYPE_HOST,recvbuf,MPI_REPLACE)); 6578 /* Add local entries to A and B */ 6579 for (PetscCount i=0; i<Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6580 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6581 for (PetscCount k=Ajmap1[i]; k<Ajmap1[i+1]; k++) sum += v[Aperm1[k]]; 6582 Aa[i] = (imode == INSERT_VALUES? 0.0 : Aa[i]) + sum; 6583 } 6584 for (PetscCount i=0; i<Bnnz; i++) { 6585 PetscScalar sum = 0.0; 6586 for (PetscCount k=Bjmap1[i]; k<Bjmap1[i+1]; k++) sum += v[Bperm1[k]]; 6587 Ba[i] = (imode == INSERT_VALUES? 0.0 : Ba[i]) + sum; 6588 } 6589 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf,MPIU_SCALAR,sendbuf,recvbuf,MPI_REPLACE)); 6590 6591 /* Add received remote entries to A and B */ 6592 for (PetscCount i=0; i<Annz2; i++) { 6593 for (PetscCount k=Ajmap2[i]; k<Ajmap2[i+1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6594 } 6595 for (PetscCount i=0; i<Bnnz2; i++) { 6596 for (PetscCount k=Bjmap2[i]; k<Bjmap2[i+1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6597 } 6598 PetscCall(MatSeqAIJRestoreArray(A,&Aa)); 6599 PetscCall(MatSeqAIJRestoreArray(B,&Ba)); 6600 PetscFunctionReturn(0); 6601 } 6602 6603 /* ----------------------------------------------------------------*/ 6604 6605 /*MC 6606 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6607 6608 Options Database Keys: 6609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6610 6611 Level: beginner 6612 6613 Notes: 6614 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6615 in this case the values associated with the rows and columns one passes in are set to zero 6616 in the matrix 6617 6618 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6619 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6620 6621 .seealso: `MatCreateAIJ()` 6622 M*/ 6623 6624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6625 { 6626 Mat_MPIAIJ *b; 6627 PetscMPIInt size; 6628 6629 PetscFunctionBegin; 6630 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 6631 6632 PetscCall(PetscNewLog(B,&b)); 6633 B->data = (void*)b; 6634 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 6635 B->assembled = PETSC_FALSE; 6636 B->insertmode = NOT_SET_VALUES; 6637 b->size = size; 6638 6639 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank)); 6640 6641 /* build cache for off array entries formed */ 6642 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash)); 6643 6644 b->donotstash = PETSC_FALSE; 6645 b->colmap = NULL; 6646 b->garray = NULL; 6647 b->roworiented = PETSC_TRUE; 6648 6649 /* stuff used for matrix vector multiply */ 6650 b->lvec = NULL; 6651 b->Mvctx = NULL; 6652 6653 /* stuff for MatGetRow() */ 6654 b->rowindices = NULL; 6655 b->rowvalues = NULL; 6656 b->getrowactive = PETSC_FALSE; 6657 6658 /* flexible pointer used in CUSPARSE classes */ 6659 b->spptr = NULL; 6660 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ)); 6666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ)); 6667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ)); 6669 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM)); 6670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL)); 6671 #if defined(PETSC_HAVE_CUDA) 6672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6673 #endif 6674 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos)); 6676 #endif 6677 #if defined(PETSC_HAVE_MKL_SPARSE) 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL)); 6679 #endif 6680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL)); 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ)); 6682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ)); 6683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 6684 #if defined(PETSC_HAVE_ELEMENTAL) 6685 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental)); 6686 #endif 6687 #if defined(PETSC_HAVE_SCALAPACK) 6688 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK)); 6689 #endif 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS)); 6691 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL)); 6692 #if defined(PETSC_HAVE_HYPRE) 6693 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE)); 6694 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6695 #endif 6696 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ)); 6697 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ)); 6698 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetPreallocationCOO_C",MatSetPreallocationCOO_MPIAIJ)); 6699 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSetValuesCOO_C",MatSetValuesCOO_MPIAIJ)); 6700 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ)); 6701 PetscFunctionReturn(0); 6702 } 6703 6704 /*@C 6705 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6706 and "off-diagonal" part of the matrix in CSR format. 6707 6708 Collective 6709 6710 Input Parameters: 6711 + comm - MPI communicator 6712 . m - number of local rows (Cannot be PETSC_DECIDE) 6713 . n - This value should be the same as the local size used in creating the 6714 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6715 calculated if N is given) For square matrices n is almost always m. 6716 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6717 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6718 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6719 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6720 . a - matrix values 6721 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6722 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6723 - oa - matrix values 6724 6725 Output Parameter: 6726 . mat - the matrix 6727 6728 Level: advanced 6729 6730 Notes: 6731 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6732 must free the arrays once the matrix has been destroyed and not before. 6733 6734 The i and j indices are 0 based 6735 6736 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6737 6738 This sets local rows and cannot be used to set off-processor values. 6739 6740 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6741 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6742 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6743 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6744 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6745 communication if it is known that only local entries will be set. 6746 6747 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6748 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6749 @*/ 6750 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6751 { 6752 Mat_MPIAIJ *maij; 6753 6754 PetscFunctionBegin; 6755 PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6756 PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6757 PetscCheck(oi[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6758 PetscCall(MatCreate(comm,mat)); 6759 PetscCall(MatSetSizes(*mat,m,n,M,N)); 6760 PetscCall(MatSetType(*mat,MATMPIAIJ)); 6761 maij = (Mat_MPIAIJ*) (*mat)->data; 6762 6763 (*mat)->preallocated = PETSC_TRUE; 6764 6765 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6766 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6767 6768 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A)); 6769 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B)); 6770 6771 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 6772 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 6773 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 6774 PetscCall(MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE)); 6775 PetscCall(MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 6776 PetscFunctionReturn(0); 6777 } 6778 6779 typedef struct { 6780 Mat *mp; /* intermediate products */ 6781 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6782 PetscInt cp; /* number of intermediate products */ 6783 6784 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6785 PetscInt *startsj_s,*startsj_r; 6786 PetscScalar *bufa; 6787 Mat P_oth; 6788 6789 /* may take advantage of merging product->B */ 6790 Mat Bloc; /* B-local by merging diag and off-diag */ 6791 6792 /* cusparse does not have support to split between symbolic and numeric phases. 6793 When api_user is true, we don't need to update the numerical values 6794 of the temporary storage */ 6795 PetscBool reusesym; 6796 6797 /* support for COO values insertion */ 6798 PetscScalar *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6799 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6800 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6801 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6802 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6803 PetscMemType mtype; 6804 6805 /* customization */ 6806 PetscBool abmerge; 6807 PetscBool P_oth_bind; 6808 } MatMatMPIAIJBACKEND; 6809 6810 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6811 { 6812 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data; 6813 PetscInt i; 6814 6815 PetscFunctionBegin; 6816 PetscCall(PetscFree2(mmdata->startsj_s,mmdata->startsj_r)); 6817 PetscCall(PetscFree(mmdata->bufa)); 6818 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v)); 6819 PetscCall(PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w)); 6820 PetscCall(MatDestroy(&mmdata->P_oth)); 6821 PetscCall(MatDestroy(&mmdata->Bloc)); 6822 PetscCall(PetscSFDestroy(&mmdata->sf)); 6823 for (i = 0; i < mmdata->cp; i++) { 6824 PetscCall(MatDestroy(&mmdata->mp[i])); 6825 } 6826 PetscCall(PetscFree2(mmdata->mp,mmdata->mptmp)); 6827 PetscCall(PetscFree(mmdata->own[0])); 6828 PetscCall(PetscFree(mmdata->own)); 6829 PetscCall(PetscFree(mmdata->off[0])); 6830 PetscCall(PetscFree(mmdata->off)); 6831 PetscCall(PetscFree(mmdata)); 6832 PetscFunctionReturn(0); 6833 } 6834 6835 /* Copy selected n entries with indices in idx[] of A to v[]. 6836 If idx is NULL, copy the whole data array of A to v[] 6837 */ 6838 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6839 { 6840 PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]); 6841 6842 PetscFunctionBegin; 6843 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f)); 6844 if (f) { 6845 PetscCall((*f)(A,n,idx,v)); 6846 } else { 6847 const PetscScalar *vv; 6848 6849 PetscCall(MatSeqAIJGetArrayRead(A,&vv)); 6850 if (n && idx) { 6851 PetscScalar *w = v; 6852 const PetscInt *oi = idx; 6853 PetscInt j; 6854 6855 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6856 } else { 6857 PetscCall(PetscArraycpy(v,vv,n)); 6858 } 6859 PetscCall(MatSeqAIJRestoreArrayRead(A,&vv)); 6860 } 6861 PetscFunctionReturn(0); 6862 } 6863 6864 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6865 { 6866 MatMatMPIAIJBACKEND *mmdata; 6867 PetscInt i,n_d,n_o; 6868 6869 PetscFunctionBegin; 6870 MatCheckProduct(C,1); 6871 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 6872 mmdata = (MatMatMPIAIJBACKEND*)C->product->data; 6873 if (!mmdata->reusesym) { /* update temporary matrices */ 6874 if (mmdata->P_oth) { 6875 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 6876 } 6877 if (mmdata->Bloc) { 6878 PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc)); 6879 } 6880 } 6881 mmdata->reusesym = PETSC_FALSE; 6882 6883 for (i = 0; i < mmdata->cp; i++) { 6884 PetscCheck(mmdata->mp[i]->ops->productnumeric,PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]); 6885 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6886 } 6887 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6888 PetscInt noff = mmdata->off[i+1] - mmdata->off[i]; 6889 6890 if (mmdata->mptmp[i]) continue; 6891 if (noff) { 6892 PetscInt nown = mmdata->own[i+1] - mmdata->own[i]; 6893 6894 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o)); 6895 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d)); 6896 n_o += noff; 6897 n_d += nown; 6898 } else { 6899 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data; 6900 6901 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d)); 6902 n_d += mm->nz; 6903 } 6904 } 6905 if (mmdata->hasoffproc) { /* offprocess insertion */ 6906 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6907 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d)); 6908 } 6909 PetscCall(MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES)); 6910 PetscFunctionReturn(0); 6911 } 6912 6913 /* Support for Pt * A, A * P, or Pt * A * P */ 6914 #define MAX_NUMBER_INTERMEDIATE 4 6915 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 6916 { 6917 Mat_Product *product = C->product; 6918 Mat A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6919 Mat_MPIAIJ *a,*p; 6920 MatMatMPIAIJBACKEND *mmdata; 6921 ISLocalToGlobalMapping P_oth_l2g = NULL; 6922 IS glob = NULL; 6923 const char *prefix; 6924 char pprefix[256]; 6925 const PetscInt *globidx,*P_oth_idx; 6926 PetscInt i,j,cp,m,n,M,N,*coo_i,*coo_j; 6927 PetscCount ncoo,ncoo_d,ncoo_o,ncoo_oown; 6928 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6929 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6930 /* a base offset; type-2: sparse with a local to global map table */ 6931 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6932 6933 MatProductType ptype; 6934 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk; 6935 PetscMPIInt size; 6936 6937 PetscFunctionBegin; 6938 MatCheckProduct(C,1); 6939 PetscCheck(!product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 6940 ptype = product->type; 6941 if (product->A->symmetric && ptype == MATPRODUCT_AtB) { 6942 ptype = MATPRODUCT_AB; 6943 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6944 } 6945 switch (ptype) { 6946 case MATPRODUCT_AB: 6947 A = product->A; 6948 P = product->B; 6949 m = A->rmap->n; 6950 n = P->cmap->n; 6951 M = A->rmap->N; 6952 N = P->cmap->N; 6953 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6954 break; 6955 case MATPRODUCT_AtB: 6956 P = product->A; 6957 A = product->B; 6958 m = P->cmap->n; 6959 n = A->cmap->n; 6960 M = P->cmap->N; 6961 N = A->cmap->N; 6962 hasoffproc = PETSC_TRUE; 6963 break; 6964 case MATPRODUCT_PtAP: 6965 A = product->A; 6966 P = product->B; 6967 m = P->cmap->n; 6968 n = P->cmap->n; 6969 M = P->cmap->N; 6970 N = P->cmap->N; 6971 hasoffproc = PETSC_TRUE; 6972 break; 6973 default: 6974 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 6975 } 6976 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C),&size)); 6977 if (size == 1) hasoffproc = PETSC_FALSE; 6978 6979 /* defaults */ 6980 for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) { 6981 mp[i] = NULL; 6982 mptmp[i] = PETSC_FALSE; 6983 rmapt[i] = -1; 6984 cmapt[i] = -1; 6985 rmapa[i] = NULL; 6986 cmapa[i] = NULL; 6987 } 6988 6989 /* customization */ 6990 PetscCall(PetscNew(&mmdata)); 6991 mmdata->reusesym = product->api_user; 6992 if (ptype == MATPRODUCT_AB) { 6993 if (product->api_user) { 6994 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat"); 6995 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 6996 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 6997 PetscOptionsEnd(); 6998 } else { 6999 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat"); 7000 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL)); 7001 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7002 PetscOptionsEnd(); 7003 } 7004 } else if (ptype == MATPRODUCT_PtAP) { 7005 if (product->api_user) { 7006 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat"); 7007 PetscCall(PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7008 PetscOptionsEnd(); 7009 } else { 7010 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat"); 7011 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL)); 7012 PetscOptionsEnd(); 7013 } 7014 } 7015 a = (Mat_MPIAIJ*)A->data; 7016 p = (Mat_MPIAIJ*)P->data; 7017 PetscCall(MatSetSizes(C,m,n,M,N)); 7018 PetscCall(PetscLayoutSetUp(C->rmap)); 7019 PetscCall(PetscLayoutSetUp(C->cmap)); 7020 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 7021 PetscCall(MatGetOptionsPrefix(C,&prefix)); 7022 7023 cp = 0; 7024 switch (ptype) { 7025 case MATPRODUCT_AB: /* A * P */ 7026 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7027 7028 /* A_diag * P_local (merged or not) */ 7029 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7030 /* P is product->B */ 7031 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7032 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7033 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7034 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7035 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7036 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7037 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7038 mp[cp]->product->api_user = product->api_user; 7039 PetscCall(MatProductSetFromOptions(mp[cp])); 7040 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7041 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7042 PetscCall(ISGetIndices(glob,&globidx)); 7043 rmapt[cp] = 1; 7044 cmapt[cp] = 2; 7045 cmapa[cp] = globidx; 7046 mptmp[cp] = PETSC_FALSE; 7047 cp++; 7048 } else { /* A_diag * P_diag and A_diag * P_off */ 7049 PetscCall(MatProductCreate(a->A,p->A,NULL,&mp[cp])); 7050 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7051 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7052 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7053 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7054 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7055 mp[cp]->product->api_user = product->api_user; 7056 PetscCall(MatProductSetFromOptions(mp[cp])); 7057 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7058 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7059 rmapt[cp] = 1; 7060 cmapt[cp] = 1; 7061 mptmp[cp] = PETSC_FALSE; 7062 cp++; 7063 PetscCall(MatProductCreate(a->A,p->B,NULL,&mp[cp])); 7064 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7065 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7066 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7067 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7068 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7069 mp[cp]->product->api_user = product->api_user; 7070 PetscCall(MatProductSetFromOptions(mp[cp])); 7071 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7072 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7073 rmapt[cp] = 1; 7074 cmapt[cp] = 2; 7075 cmapa[cp] = p->garray; 7076 mptmp[cp] = PETSC_FALSE; 7077 cp++; 7078 } 7079 7080 /* A_off * P_other */ 7081 if (mmdata->P_oth) { 7082 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); /* make P_oth use local col ids */ 7083 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7084 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7085 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7086 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7087 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7088 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7089 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7090 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7091 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7092 mp[cp]->product->api_user = product->api_user; 7093 PetscCall(MatProductSetFromOptions(mp[cp])); 7094 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7095 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7096 rmapt[cp] = 1; 7097 cmapt[cp] = 2; 7098 cmapa[cp] = P_oth_idx; 7099 mptmp[cp] = PETSC_FALSE; 7100 cp++; 7101 } 7102 break; 7103 7104 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7105 /* A is product->B */ 7106 PetscCall(MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7107 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7108 PetscCall(MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp])); 7109 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7110 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7111 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7112 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7113 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7114 mp[cp]->product->api_user = product->api_user; 7115 PetscCall(MatProductSetFromOptions(mp[cp])); 7116 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7117 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7118 PetscCall(ISGetIndices(glob,&globidx)); 7119 rmapt[cp] = 2; 7120 rmapa[cp] = globidx; 7121 cmapt[cp] = 2; 7122 cmapa[cp] = globidx; 7123 mptmp[cp] = PETSC_FALSE; 7124 cp++; 7125 } else { 7126 PetscCall(MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp])); 7127 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7128 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7129 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7130 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7131 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7132 mp[cp]->product->api_user = product->api_user; 7133 PetscCall(MatProductSetFromOptions(mp[cp])); 7134 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7135 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7136 PetscCall(ISGetIndices(glob,&globidx)); 7137 rmapt[cp] = 1; 7138 cmapt[cp] = 2; 7139 cmapa[cp] = globidx; 7140 mptmp[cp] = PETSC_FALSE; 7141 cp++; 7142 PetscCall(MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp])); 7143 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7144 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7145 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7146 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7147 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7148 mp[cp]->product->api_user = product->api_user; 7149 PetscCall(MatProductSetFromOptions(mp[cp])); 7150 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7151 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7152 rmapt[cp] = 2; 7153 rmapa[cp] = p->garray; 7154 cmapt[cp] = 2; 7155 cmapa[cp] = globidx; 7156 mptmp[cp] = PETSC_FALSE; 7157 cp++; 7158 } 7159 break; 7160 case MATPRODUCT_PtAP: 7161 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth)); 7162 /* P is product->B */ 7163 PetscCall(MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc)); 7164 PetscCall(MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp])); 7165 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_PtAP)); 7166 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7167 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7168 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7169 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7170 mp[cp]->product->api_user = product->api_user; 7171 PetscCall(MatProductSetFromOptions(mp[cp])); 7172 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7173 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7174 PetscCall(ISGetIndices(glob,&globidx)); 7175 rmapt[cp] = 2; 7176 rmapa[cp] = globidx; 7177 cmapt[cp] = 2; 7178 cmapa[cp] = globidx; 7179 mptmp[cp] = PETSC_FALSE; 7180 cp++; 7181 if (mmdata->P_oth) { 7182 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g)); 7183 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx)); 7184 PetscCall(MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name)); 7185 PetscCall(MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind)); 7186 PetscCall(MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp])); 7187 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AB)); 7188 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7189 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7190 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7191 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7192 mp[cp]->product->api_user = product->api_user; 7193 PetscCall(MatProductSetFromOptions(mp[cp])); 7194 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7195 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7196 mptmp[cp] = PETSC_TRUE; 7197 cp++; 7198 PetscCall(MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp])); 7199 PetscCall(MatProductSetType(mp[cp],MATPRODUCT_AtB)); 7200 PetscCall(MatProductSetFill(mp[cp],product->fill)); 7201 PetscCall(PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%" PetscInt_FMT "_",cp)); 7202 PetscCall(MatSetOptionsPrefix(mp[cp],prefix)); 7203 PetscCall(MatAppendOptionsPrefix(mp[cp],pprefix)); 7204 mp[cp]->product->api_user = product->api_user; 7205 PetscCall(MatProductSetFromOptions(mp[cp])); 7206 PetscCheck(mp[cp]->ops->productsymbolic,PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]); 7207 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7208 rmapt[cp] = 2; 7209 rmapa[cp] = globidx; 7210 cmapt[cp] = 2; 7211 cmapa[cp] = P_oth_idx; 7212 mptmp[cp] = PETSC_FALSE; 7213 cp++; 7214 } 7215 break; 7216 default: 7217 SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]); 7218 } 7219 /* sanity check */ 7220 if (size > 1) for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %" PetscInt_FMT,i); 7221 7222 PetscCall(PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp)); 7223 for (i = 0; i < cp; i++) { 7224 mmdata->mp[i] = mp[i]; 7225 mmdata->mptmp[i] = mptmp[i]; 7226 } 7227 mmdata->cp = cp; 7228 C->product->data = mmdata; 7229 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7230 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7231 7232 /* memory type */ 7233 mmdata->mtype = PETSC_MEMTYPE_HOST; 7234 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"")); 7235 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"")); 7236 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7237 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7238 7239 /* prepare coo coordinates for values insertion */ 7240 7241 /* count total nonzeros of those intermediate seqaij Mats 7242 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7243 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7244 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7245 */ 7246 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7247 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7248 if (mptmp[cp]) continue; 7249 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7250 const PetscInt *rmap = rmapa[cp]; 7251 const PetscInt mr = mp[cp]->rmap->n; 7252 const PetscInt rs = C->rmap->rstart; 7253 const PetscInt re = C->rmap->rend; 7254 const PetscInt *ii = mm->i; 7255 for (i = 0; i < mr; i++) { 7256 const PetscInt gr = rmap[i]; 7257 const PetscInt nz = ii[i+1] - ii[i]; 7258 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7259 else ncoo_oown += nz; /* this row is local */ 7260 } 7261 } else ncoo_d += mm->nz; 7262 } 7263 7264 /* 7265 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7266 7267 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7268 7269 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7270 7271 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7272 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7273 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7274 7275 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7276 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7277 */ 7278 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->off)); /* +1 to make a csr-like data structure */ 7279 PetscCall(PetscCalloc1(mmdata->cp+1,&mmdata->own)); 7280 7281 /* gather (i,j) of nonzeros inserted by remote procs */ 7282 if (hasoffproc) { 7283 PetscSF msf; 7284 PetscInt ncoo2,*coo_i2,*coo_j2; 7285 7286 PetscCall(PetscMalloc1(ncoo_o,&mmdata->off[0])); 7287 PetscCall(PetscMalloc1(ncoo_oown,&mmdata->own[0])); 7288 PetscCall(PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j)); /* to collect (i,j) of entries to be sent to others */ 7289 7290 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7291 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7292 PetscInt *idxoff = mmdata->off[cp]; 7293 PetscInt *idxown = mmdata->own[cp]; 7294 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7295 const PetscInt *rmap = rmapa[cp]; 7296 const PetscInt *cmap = cmapa[cp]; 7297 const PetscInt *ii = mm->i; 7298 PetscInt *coi = coo_i + ncoo_o; 7299 PetscInt *coj = coo_j + ncoo_o; 7300 const PetscInt mr = mp[cp]->rmap->n; 7301 const PetscInt rs = C->rmap->rstart; 7302 const PetscInt re = C->rmap->rend; 7303 const PetscInt cs = C->cmap->rstart; 7304 for (i = 0; i < mr; i++) { 7305 const PetscInt *jj = mm->j + ii[i]; 7306 const PetscInt gr = rmap[i]; 7307 const PetscInt nz = ii[i+1] - ii[i]; 7308 if (gr < rs || gr >= re) { /* this is an offproc row */ 7309 for (j = ii[i]; j < ii[i+1]; j++) { 7310 *coi++ = gr; 7311 *idxoff++ = j; 7312 } 7313 if (!cmapt[cp]) { /* already global */ 7314 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7315 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7316 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7317 } else { /* offdiag */ 7318 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7319 } 7320 ncoo_o += nz; 7321 } else { /* this is a local row */ 7322 for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j; 7323 } 7324 } 7325 } 7326 mmdata->off[cp + 1] = idxoff; 7327 mmdata->own[cp + 1] = idxown; 7328 } 7329 7330 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7331 PetscCall(PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i)); 7332 PetscCall(PetscSFGetMultiSF(mmdata->sf,&msf)); 7333 PetscCall(PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL)); 7334 ncoo = ncoo_d + ncoo_oown + ncoo2; 7335 PetscCall(PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2)); 7336 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7337 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown)); 7338 PetscCall(PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7339 PetscCall(PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown)); 7340 PetscCall(PetscFree2(coo_i,coo_j)); 7341 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7342 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w)); 7343 coo_i = coo_i2; 7344 coo_j = coo_j2; 7345 } else { /* no offproc values insertion */ 7346 ncoo = ncoo_d; 7347 PetscCall(PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j)); 7348 7349 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf)); 7350 PetscCall(PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER)); 7351 PetscCall(PetscSFSetUp(mmdata->sf)); 7352 } 7353 mmdata->hasoffproc = hasoffproc; 7354 7355 /* gather (i,j) of nonzeros inserted locally */ 7356 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7357 Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data; 7358 PetscInt *coi = coo_i + ncoo_d; 7359 PetscInt *coj = coo_j + ncoo_d; 7360 const PetscInt *jj = mm->j; 7361 const PetscInt *ii = mm->i; 7362 const PetscInt *cmap = cmapa[cp]; 7363 const PetscInt *rmap = rmapa[cp]; 7364 const PetscInt mr = mp[cp]->rmap->n; 7365 const PetscInt rs = C->rmap->rstart; 7366 const PetscInt re = C->rmap->rend; 7367 const PetscInt cs = C->cmap->rstart; 7368 7369 if (mptmp[cp]) continue; 7370 if (rmapt[cp] == 1) { /* consecutive rows */ 7371 /* fill coo_i */ 7372 for (i = 0; i < mr; i++) { 7373 const PetscInt gr = i + rs; 7374 for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr; 7375 } 7376 /* fill coo_j */ 7377 if (!cmapt[cp]) { /* type-0, already global */ 7378 PetscCall(PetscArraycpy(coj,jj,mm->nz)); 7379 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7380 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7381 } else { /* type-2, local to global for sparse columns */ 7382 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7383 } 7384 ncoo_d += mm->nz; 7385 } else if (rmapt[cp] == 2) { /* sparse rows */ 7386 for (i = 0; i < mr; i++) { 7387 const PetscInt *jj = mm->j + ii[i]; 7388 const PetscInt gr = rmap[i]; 7389 const PetscInt nz = ii[i+1] - ii[i]; 7390 if (gr >= rs && gr < re) { /* local rows */ 7391 for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr; 7392 if (!cmapt[cp]) { /* type-0, already global */ 7393 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7394 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7395 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7396 } else { /* type-2, local to global for sparse columns */ 7397 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7398 } 7399 ncoo_d += nz; 7400 } 7401 } 7402 } 7403 } 7404 if (glob) { 7405 PetscCall(ISRestoreIndices(glob,&globidx)); 7406 } 7407 PetscCall(ISDestroy(&glob)); 7408 if (P_oth_l2g) { 7409 PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx)); 7410 } 7411 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7412 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7413 PetscCall(PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v)); 7414 7415 /* preallocate with COO data */ 7416 PetscCall(MatSetPreallocationCOO(C,ncoo,coo_i,coo_j)); 7417 PetscCall(PetscFree2(coo_i,coo_j)); 7418 PetscFunctionReturn(0); 7419 } 7420 7421 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7422 { 7423 Mat_Product *product = mat->product; 7424 #if defined(PETSC_HAVE_DEVICE) 7425 PetscBool match = PETSC_FALSE; 7426 PetscBool usecpu = PETSC_FALSE; 7427 #else 7428 PetscBool match = PETSC_TRUE; 7429 #endif 7430 7431 PetscFunctionBegin; 7432 MatCheckProduct(mat,1); 7433 #if defined(PETSC_HAVE_DEVICE) 7434 if (!product->A->boundtocpu && !product->B->boundtocpu) { 7435 PetscCall(PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match)); 7436 } 7437 if (match) { /* we can always fallback to the CPU if requested */ 7438 switch (product->type) { 7439 case MATPRODUCT_AB: 7440 if (product->api_user) { 7441 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat"); 7442 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7443 PetscOptionsEnd(); 7444 } else { 7445 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat"); 7446 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL)); 7447 PetscOptionsEnd(); 7448 } 7449 break; 7450 case MATPRODUCT_AtB: 7451 if (product->api_user) { 7452 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat"); 7453 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7454 PetscOptionsEnd(); 7455 } else { 7456 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat"); 7457 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL)); 7458 PetscOptionsEnd(); 7459 } 7460 break; 7461 case MATPRODUCT_PtAP: 7462 if (product->api_user) { 7463 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat"); 7464 PetscCall(PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7465 PetscOptionsEnd(); 7466 } else { 7467 PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat"); 7468 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL)); 7469 PetscOptionsEnd(); 7470 } 7471 break; 7472 default: 7473 break; 7474 } 7475 match = (PetscBool)!usecpu; 7476 } 7477 #endif 7478 if (match) { 7479 switch (product->type) { 7480 case MATPRODUCT_AB: 7481 case MATPRODUCT_AtB: 7482 case MATPRODUCT_PtAP: 7483 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7484 break; 7485 default: 7486 break; 7487 } 7488 } 7489 /* fallback to MPIAIJ ops */ 7490 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7491 PetscFunctionReturn(0); 7492 } 7493 7494 /* 7495 Special version for direct calls from Fortran 7496 */ 7497 #include <petsc/private/fortranimpl.h> 7498 7499 /* Change these macros so can be used in void function */ 7500 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7501 #undef PetscCall 7502 #define PetscCall(...) do { \ 7503 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7504 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7505 *_ierr = PetscError(PETSC_COMM_SELF,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr_msv_mpiaij,PETSC_ERROR_REPEAT," "); \ 7506 return; \ 7507 } \ 7508 } while (0) 7509 7510 #undef SETERRQ 7511 #define SETERRQ(comm,ierr,...) do { \ 7512 *_ierr = PetscError(comm,__LINE__,PETSC_FUNCTION_NAME,__FILE__,ierr,PETSC_ERROR_INITIAL,__VA_ARGS__); \ 7513 return; \ 7514 } while (0) 7515 7516 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7517 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7518 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7519 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7520 #else 7521 #endif 7522 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 7523 { 7524 Mat mat = *mmat; 7525 PetscInt m = *mm, n = *mn; 7526 InsertMode addv = *maddv; 7527 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 7528 PetscScalar value; 7529 7530 MatCheckPreallocated(mat,1); 7531 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7532 else PetscCheck(mat->insertmode == addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 7533 { 7534 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 7535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 7536 PetscBool roworiented = aij->roworiented; 7537 7538 /* Some Variables required in the macro */ 7539 Mat A = aij->A; 7540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 7541 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 7542 MatScalar *aa; 7543 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7544 Mat B = aij->B; 7545 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 7546 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 7547 MatScalar *ba; 7548 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7549 * cannot use "#if defined" inside a macro. */ 7550 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7551 7552 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 7553 PetscInt nonew = a->nonew; 7554 MatScalar *ap1,*ap2; 7555 7556 PetscFunctionBegin; 7557 PetscCall(MatSeqAIJGetArray(A,&aa)); 7558 PetscCall(MatSeqAIJGetArray(B,&ba)); 7559 for (i=0; i<m; i++) { 7560 if (im[i] < 0) continue; 7561 PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 7562 if (im[i] >= rstart && im[i] < rend) { 7563 row = im[i] - rstart; 7564 lastcol1 = -1; 7565 rp1 = aj + ai[row]; 7566 ap1 = aa + ai[row]; 7567 rmax1 = aimax[row]; 7568 nrow1 = ailen[row]; 7569 low1 = 0; 7570 high1 = nrow1; 7571 lastcol2 = -1; 7572 rp2 = bj + bi[row]; 7573 ap2 = ba + bi[row]; 7574 rmax2 = bimax[row]; 7575 nrow2 = bilen[row]; 7576 low2 = 0; 7577 high2 = nrow2; 7578 7579 for (j=0; j<n; j++) { 7580 if (roworiented) value = v[i*n+j]; 7581 else value = v[i+j*m]; 7582 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7583 if (in[j] >= cstart && in[j] < cend) { 7584 col = in[j] - cstart; 7585 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 7586 } else if (in[j] < 0) continue; 7587 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7588 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7589 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 7590 } else { 7591 if (mat->was_assembled) { 7592 if (!aij->colmap) { 7593 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 7594 } 7595 #if defined(PETSC_USE_CTABLE) 7596 PetscCall(PetscTableFind(aij->colmap,in[j]+1,&col)); 7597 col--; 7598 #else 7599 col = aij->colmap[in[j]] - 1; 7600 #endif 7601 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 7602 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7603 col = in[j]; 7604 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7605 B = aij->B; 7606 b = (Mat_SeqAIJ*)B->data; 7607 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 7608 rp2 = bj + bi[row]; 7609 ap2 = ba + bi[row]; 7610 rmax2 = bimax[row]; 7611 nrow2 = bilen[row]; 7612 low2 = 0; 7613 high2 = nrow2; 7614 bm = aij->B->rmap->n; 7615 ba = b->a; 7616 inserted = PETSC_FALSE; 7617 } 7618 } else col = in[j]; 7619 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 7620 } 7621 } 7622 } else if (!aij->donotstash) { 7623 if (roworiented) { 7624 PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7625 } else { 7626 PetscCall(MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7627 } 7628 } 7629 } 7630 PetscCall(MatSeqAIJRestoreArray(A,&aa)); 7631 PetscCall(MatSeqAIJRestoreArray(B,&ba)); 7632 } 7633 PetscFunctionReturnVoid(); 7634 } 7635 /* Undefining these here since they were redefined from their original definition above! No 7636 * other PETSc functions should be defined past this point, as it is impossible to recover the 7637 * original definitions */ 7638 #undef PetscCall 7639 #undef SETERRQ 7640