1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1,&fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1,rfill)); 63 PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i,k,cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w,&dd->ofillcols)); 76 for (i=0; i<dd->w; i++) { 77 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i=0; i<dd->w; i++) { 80 if (dd->ofillcols[i]) { 81 dd->ofillcols[i] = cnt++; 82 } 83 } 84 PetscFunctionReturn(0); 85 } 86 87 /*@ 88 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 89 of the matrix returned by DMCreateMatrix(). 90 91 Logically Collective on da 92 93 Input Parameters: 94 + da - the distributed array 95 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 96 - ofill - the fill pattern in the off-diagonal blocks 97 98 Level: developer 99 100 Notes: 101 This only makes sense when you are doing multicomponent problems but using the 102 MPIAIJ matrix format 103 104 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 105 representing coupling and 0 entries for missing coupling. For example 106 $ dfill[9] = {1, 0, 0, 107 $ 1, 1, 0, 108 $ 0, 1, 1} 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 119 120 @*/ 121 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 122 { 123 DM_DA *dd = (DM_DA*)da->data; 124 125 PetscFunctionBegin; 126 /* save the given dfill and ofill information */ 127 PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill)); 128 PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill)); 129 130 /* count nonzeros in ofill columns */ 131 PetscCall(DMDASetBlockFills_Private2(dd)); 132 133 PetscFunctionReturn(0); 134 } 135 136 /*@ 137 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 138 of the matrix returned by DMCreateMatrix(), using sparse representations 139 of fill patterns. 140 141 Logically Collective on da 142 143 Input Parameters: 144 + da - the distributed array 145 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 146 - ofill - the sparse fill pattern in the off-diagonal blocks 147 148 Level: developer 149 150 Notes: This only makes sense when you are doing multicomponent problems but using the 151 MPIAIJ matrix format 152 153 The format for dfill and ofill is a sparse representation of a 154 dof-by-dof matrix with 1 entries representing coupling and 0 entries 155 for missing coupling. The sparse representation is a 1 dimensional 156 array of length nz + dof + 1, where nz is the number of non-zeros in 157 the matrix. The first dof entries in the array give the 158 starting array indices of each row's items in the rest of the array, 159 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 160 and the remaining nz items give the column indices of each of 161 the 1s within the logical 2D matrix. Each row's items within 162 the array are the column indices of the 1s within that row 163 of the 2D matrix. PETSc developers may recognize that this is the 164 same format as that computed by the DMDASetBlockFills_Private() 165 function from a dense 2D matrix representation. 166 167 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 168 can be represented in the dfill, ofill format 169 170 Contributed by Philip C. Roth 171 172 .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly() 173 174 @*/ 175 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 176 { 177 DM_DA *dd = (DM_DA*)da->data; 178 179 PetscFunctionBegin; 180 /* save the given dfill and ofill information */ 181 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill)); 182 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill)); 183 184 /* count nonzeros in ofill columns */ 185 PetscCall(DMDASetBlockFills_Private2(dd)); 186 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 191 { 192 PetscInt dim,m,n,p,nc; 193 DMBoundaryType bx,by,bz; 194 MPI_Comm comm; 195 PetscMPIInt size; 196 PetscBool isBAIJ; 197 DM_DA *dd = (DM_DA*)da->data; 198 199 PetscFunctionBegin; 200 /* 201 m 202 ------------------------------------------------------ 203 | | 204 | | 205 | ---------------------- | 206 | | | | 207 n | yn | | | 208 | | | | 209 | .--------------------- | 210 | (xs,ys) xn | 211 | . | 212 | (gxs,gys) | 213 | | 214 ----------------------------------------------------- 215 */ 216 217 /* 218 nc - number of components per grid point 219 col - number of colors needed in one direction for single component problem 220 221 */ 222 PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL)); 223 224 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 225 PetscCallMPI(MPI_Comm_size(comm,&size)); 226 if (ctype == IS_COLORING_LOCAL) { 227 if (size == 1) { 228 ctype = IS_COLORING_GLOBAL; 229 } else if (dim > 1) { 230 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 231 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 232 } 233 } 234 } 235 236 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 237 matrices is for the blocks, not the individual matrix elements */ 238 PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ)); 239 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ)); 240 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ)); 241 if (isBAIJ) { 242 dd->w = 1; 243 dd->xs = dd->xs/nc; 244 dd->xe = dd->xe/nc; 245 dd->Xs = dd->Xs/nc; 246 dd->Xe = dd->Xe/nc; 247 } 248 249 /* 250 We do not provide a getcoloring function in the DMDA operations because 251 the basic DMDA does not know about matrices. We think of DMDA as being 252 more low-level then matrices. 253 */ 254 if (dim == 1) { 255 PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring)); 256 } else if (dim == 2) { 257 PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring)); 258 } else if (dim == 3) { 259 PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring)); 260 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 261 if (isBAIJ) { 262 dd->w = nc; 263 dd->xs = dd->xs*nc; 264 dd->xe = dd->xe*nc; 265 dd->Xs = dd->Xs*nc; 266 dd->Xe = dd->Xe*nc; 267 } 268 PetscFunctionReturn(0); 269 } 270 271 /* ---------------------------------------------------------------------------------*/ 272 273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 274 { 275 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 276 PetscInt ncolors; 277 MPI_Comm comm; 278 DMBoundaryType bx,by; 279 DMDAStencilType st; 280 ISColoringValue *colors; 281 DM_DA *dd = (DM_DA*)da->data; 282 283 PetscFunctionBegin; 284 /* 285 nc - number of components per grid point 286 col - number of colors needed in one direction for single component problem 287 288 */ 289 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 290 col = 2*s + 1; 291 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 292 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 293 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 294 295 /* special case as taught to us by Paul Hovland */ 296 if (st == DMDA_STENCIL_STAR && s == 1) { 297 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring)); 298 } else { 299 if (ctype == IS_COLORING_GLOBAL) { 300 if (!dd->localcoloring) { 301 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 302 ii = 0; 303 for (j=ys; j<ys+ny; j++) { 304 for (i=xs; i<xs+nx; i++) { 305 for (k=0; k<nc; k++) { 306 colors[ii++] = k + nc*((i % col) + col*(j % col)); 307 } 308 } 309 } 310 ncolors = nc + nc*(col-1 + col*(col-1)); 311 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 312 } 313 *coloring = dd->localcoloring; 314 } else if (ctype == IS_COLORING_LOCAL) { 315 if (!dd->ghostedcoloring) { 316 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 317 ii = 0; 318 for (j=gys; j<gys+gny; j++) { 319 for (i=gxs; i<gxs+gnx; i++) { 320 for (k=0; k<nc; k++) { 321 /* the complicated stuff is to handle periodic boundaries */ 322 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 323 } 324 } 325 } 326 ncolors = nc + nc*(col - 1 + col*(col-1)); 327 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 328 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 329 330 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 331 } 332 *coloring = dd->ghostedcoloring; 333 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 334 } 335 PetscCall(ISColoringReference(*coloring)); 336 PetscFunctionReturn(0); 337 } 338 339 /* ---------------------------------------------------------------------------------*/ 340 341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 342 { 343 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 344 PetscInt ncolors; 345 MPI_Comm comm; 346 DMBoundaryType bx,by,bz; 347 DMDAStencilType st; 348 ISColoringValue *colors; 349 DM_DA *dd = (DM_DA*)da->data; 350 351 PetscFunctionBegin; 352 /* 353 nc - number of components per grid point 354 col - number of colors needed in one direction for single component problem 355 356 */ 357 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 358 col = 2*s + 1; 359 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 360 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 361 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 362 363 /* create the coloring */ 364 if (ctype == IS_COLORING_GLOBAL) { 365 if (!dd->localcoloring) { 366 PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors)); 367 ii = 0; 368 for (k=zs; k<zs+nz; k++) { 369 for (j=ys; j<ys+ny; j++) { 370 for (i=xs; i<xs+nx; i++) { 371 for (l=0; l<nc; l++) { 372 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 373 } 374 } 375 } 376 } 377 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 378 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 379 } 380 *coloring = dd->localcoloring; 381 } else if (ctype == IS_COLORING_LOCAL) { 382 if (!dd->ghostedcoloring) { 383 PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors)); 384 ii = 0; 385 for (k=gzs; k<gzs+gnz; k++) { 386 for (j=gys; j<gys+gny; j++) { 387 for (i=gxs; i<gxs+gnx; i++) { 388 for (l=0; l<nc; l++) { 389 /* the complicated stuff is to handle periodic boundaries */ 390 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 391 } 392 } 393 } 394 } 395 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 396 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 397 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 398 } 399 *coloring = dd->ghostedcoloring; 400 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 401 PetscCall(ISColoringReference(*coloring)); 402 PetscFunctionReturn(0); 403 } 404 405 /* ---------------------------------------------------------------------------------*/ 406 407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 408 { 409 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 410 PetscInt ncolors; 411 MPI_Comm comm; 412 DMBoundaryType bx; 413 ISColoringValue *colors; 414 DM_DA *dd = (DM_DA*)da->data; 415 416 PetscFunctionBegin; 417 /* 418 nc - number of components per grid point 419 col - number of colors needed in one direction for single component problem 420 421 */ 422 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 423 col = 2*s + 1; 424 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 425 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 426 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 427 428 /* create the coloring */ 429 if (ctype == IS_COLORING_GLOBAL) { 430 if (!dd->localcoloring) { 431 PetscCall(PetscMalloc1(nc*nx,&colors)); 432 if (dd->ofillcols) { 433 PetscInt tc = 0; 434 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 435 i1 = 0; 436 for (i=xs; i<xs+nx; i++) { 437 for (l=0; l<nc; l++) { 438 if (dd->ofillcols[l] && (i % col)) { 439 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 440 } else { 441 colors[i1++] = l; 442 } 443 } 444 } 445 ncolors = nc + 2*s*tc; 446 } else { 447 i1 = 0; 448 for (i=xs; i<xs+nx; i++) { 449 for (l=0; l<nc; l++) { 450 colors[i1++] = l + nc*(i % col); 451 } 452 } 453 ncolors = nc + nc*(col-1); 454 } 455 PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 456 } 457 *coloring = dd->localcoloring; 458 } else if (ctype == IS_COLORING_LOCAL) { 459 if (!dd->ghostedcoloring) { 460 PetscCall(PetscMalloc1(nc*gnx,&colors)); 461 i1 = 0; 462 for (i=gxs; i<gxs+gnx; i++) { 463 for (l=0; l<nc; l++) { 464 /* the complicated stuff is to handle periodic boundaries */ 465 colors[i1++] = l + nc*(SetInRange(i,m) % col); 466 } 467 } 468 ncolors = nc + nc*(col-1); 469 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 470 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 471 } 472 *coloring = dd->ghostedcoloring; 473 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 474 PetscCall(ISColoringReference(*coloring)); 475 PetscFunctionReturn(0); 476 } 477 478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 479 { 480 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 481 PetscInt ncolors; 482 MPI_Comm comm; 483 DMBoundaryType bx,by; 484 ISColoringValue *colors; 485 DM_DA *dd = (DM_DA*)da->data; 486 487 PetscFunctionBegin; 488 /* 489 nc - number of components per grid point 490 col - number of colors needed in one direction for single component problem 491 492 */ 493 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL)); 494 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 495 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 496 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 497 /* create the coloring */ 498 if (ctype == IS_COLORING_GLOBAL) { 499 if (!dd->localcoloring) { 500 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 501 ii = 0; 502 for (j=ys; j<ys+ny; j++) { 503 for (i=xs; i<xs+nx; i++) { 504 for (k=0; k<nc; k++) { 505 colors[ii++] = k + nc*((3*j+i) % 5); 506 } 507 } 508 } 509 ncolors = 5*nc; 510 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 511 } 512 *coloring = dd->localcoloring; 513 } else if (ctype == IS_COLORING_LOCAL) { 514 if (!dd->ghostedcoloring) { 515 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 516 ii = 0; 517 for (j=gys; j<gys+gny; j++) { 518 for (i=gxs; i<gxs+gnx; i++) { 519 for (k=0; k<nc; k++) { 520 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 521 } 522 } 523 } 524 ncolors = 5*nc; 525 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 526 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 527 } 528 *coloring = dd->ghostedcoloring; 529 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 530 PetscFunctionReturn(0); 531 } 532 533 /* =========================================================================== */ 534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 548 549 /*@C 550 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 551 552 Logically Collective on mat 553 554 Input Parameters: 555 + mat - the matrix 556 - da - the da 557 558 Level: intermediate 559 560 @*/ 561 PetscErrorCode MatSetupDM(Mat mat,DM da) 562 { 563 PetscFunctionBegin; 564 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 565 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 566 PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da)); 567 PetscFunctionReturn(0); 568 } 569 570 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 571 { 572 DM da; 573 const char *prefix; 574 Mat Anatural; 575 AO ao; 576 PetscInt rstart,rend,*petsc,i; 577 IS is; 578 MPI_Comm comm; 579 PetscViewerFormat format; 580 581 PetscFunctionBegin; 582 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 583 PetscCall(PetscViewerGetFormat(viewer,&format)); 584 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 585 586 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 587 PetscCall(MatGetDM(A, &da)); 588 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 589 590 PetscCall(DMDAGetAO(da,&ao)); 591 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 592 PetscCall(PetscMalloc1(rend-rstart,&petsc)); 593 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 594 PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc)); 595 PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is)); 596 597 /* call viewer on natural ordering */ 598 PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural)); 599 PetscCall(ISDestroy(&is)); 600 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix)); 601 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix)); 602 PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name)); 603 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 604 PetscCall(MatView(Anatural,viewer)); 605 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 606 PetscCall(MatDestroy(&Anatural)); 607 PetscFunctionReturn(0); 608 } 609 610 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 611 { 612 DM da; 613 Mat Anatural,Aapp; 614 AO ao; 615 PetscInt rstart,rend,*app,i,m,n,M,N; 616 IS is; 617 MPI_Comm comm; 618 619 PetscFunctionBegin; 620 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 621 PetscCall(MatGetDM(A, &da)); 622 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 623 624 /* Load the matrix in natural ordering */ 625 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural)); 626 PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name)); 627 PetscCall(MatGetSize(A,&M,&N)); 628 PetscCall(MatGetLocalSize(A,&m,&n)); 629 PetscCall(MatSetSizes(Anatural,m,n,M,N)); 630 PetscCall(MatLoad(Anatural,viewer)); 631 632 /* Map natural ordering to application ordering and create IS */ 633 PetscCall(DMDAGetAO(da,&ao)); 634 PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend)); 635 PetscCall(PetscMalloc1(rend-rstart,&app)); 636 for (i=rstart; i<rend; i++) app[i-rstart] = i; 637 PetscCall(AOPetscToApplication(ao,rend-rstart,app)); 638 PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is)); 639 640 /* Do permutation and replace header */ 641 PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp)); 642 PetscCall(MatHeaderReplace(A,&Aapp)); 643 PetscCall(ISDestroy(&is)); 644 PetscCall(MatDestroy(&Anatural)); 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 649 { 650 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 651 Mat A; 652 MPI_Comm comm; 653 MatType Atype; 654 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 655 MatType mtype; 656 PetscMPIInt size; 657 DM_DA *dd = (DM_DA*)da->data; 658 659 PetscFunctionBegin; 660 PetscCall(MatInitializePackage()); 661 mtype = da->mattype; 662 663 /* 664 m 665 ------------------------------------------------------ 666 | | 667 | | 668 | ---------------------- | 669 | | | | 670 n | ny | | | 671 | | | | 672 | .--------------------- | 673 | (xs,ys) nx | 674 | . | 675 | (gxs,gys) | 676 | | 677 ----------------------------------------------------- 678 */ 679 680 /* 681 nc - number of components per grid point 682 col - number of colors needed in one direction for single component problem 683 684 */ 685 M = dd->M; 686 N = dd->N; 687 P = dd->P; 688 dim = da->dim; 689 dof = dd->w; 690 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 691 PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz)); 692 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 693 PetscCall(MatCreate(comm,&A)); 694 PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P)); 695 PetscCall(MatSetType(A,mtype)); 696 PetscCall(MatSetFromOptions(A)); 697 if (dof*nx*ny*nz < da->bind_below) { 698 PetscCall(MatSetBindingPropagates(A,PETSC_TRUE)); 699 PetscCall(MatBindToCPU(A,PETSC_TRUE)); 700 } 701 PetscCall(MatSetDM(A,da)); 702 if (da->structure_only) { 703 PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE)); 704 } 705 PetscCall(MatGetType(A,&Atype)); 706 /* 707 We do not provide a getmatrix function in the DMDA operations because 708 the basic DMDA does not know about matrices. We think of DMDA as being more 709 more low-level than matrices. This is kind of cheating but, cause sometimes 710 we think of DMDA has higher level than matrices. 711 712 We could switch based on Atype (or mtype), but we do not since the 713 specialized setting routines depend only on the particular preallocation 714 details of the matrix, not the type itself. 715 */ 716 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij)); 717 if (!aij) { 718 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij)); 719 } 720 if (!aij) { 721 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij)); 722 if (!baij) { 723 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij)); 724 } 725 if (!baij) { 726 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij)); 727 if (!sbaij) { 728 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij)); 729 } 730 if (!sbaij) { 731 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell)); 732 if (!sell) { 733 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell)); 734 } 735 } 736 if (!sell) { 737 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is)); 738 } 739 } 740 } 741 if (aij) { 742 if (dim == 1) { 743 if (dd->ofill) { 744 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A)); 745 } else { 746 DMBoundaryType bx; 747 PetscMPIInt size; 748 PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL)); 749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 750 if (size == 1 && bx == DM_BOUNDARY_NONE) { 751 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A)); 752 } else { 753 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A)); 754 } 755 } 756 } else if (dim == 2) { 757 if (dd->ofill) { 758 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A)); 759 } else { 760 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A)); 761 } 762 } else if (dim == 3) { 763 if (dd->ofill) { 764 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A)); 765 } else { 766 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A)); 767 } 768 } 769 } else if (baij) { 770 if (dim == 2) { 771 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A)); 772 } else if (dim == 3) { 773 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A)); 774 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 775 } else if (sbaij) { 776 if (dim == 2) { 777 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A)); 778 } else if (dim == 3) { 779 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A)); 780 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 781 } else if (sell) { 782 if (dim == 2) { 783 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A)); 784 } else if (dim == 3) { 785 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A)); 786 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 787 } else if (is) { 788 PetscCall(DMCreateMatrix_DA_IS(da,A)); 789 } else { 790 ISLocalToGlobalMapping ltog; 791 792 PetscCall(MatSetBlockSize(A,dof)); 793 PetscCall(MatSetUp(A)); 794 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 795 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 796 } 797 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 798 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 799 PetscCall(MatSetDM(A,da)); 800 PetscCallMPI(MPI_Comm_size(comm,&size)); 801 if (size > 1) { 802 /* change viewer to display matrix in natural ordering */ 803 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 804 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 805 } 806 *J = A; 807 PetscFunctionReturn(0); 808 } 809 810 /* ---------------------------------------------------------------------------------*/ 811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 812 813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 814 { 815 DM_DA *da = (DM_DA*)dm->data; 816 Mat lJ,P; 817 ISLocalToGlobalMapping ltog; 818 IS is; 819 PetscBT bt; 820 const PetscInt *e_loc,*idx; 821 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 822 823 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 824 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 825 PetscFunctionBegin; 826 dof = da->w; 827 PetscCall(MatSetBlockSize(J,dof)); 828 PetscCall(DMGetLocalToGlobalMapping(dm,<og)); 829 830 /* flag local elements indices in local DMDA numbering */ 831 PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv)); 832 PetscCall(PetscBTCreate(nv/dof,&bt)); 833 PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 834 for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i])); 835 836 /* filter out (set to -1) the global indices not used by the local elements */ 837 PetscCall(PetscMalloc1(nv/dof,&gidx)); 838 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx)); 839 PetscCall(PetscArraycpy(gidx,idx,nv/dof)); 840 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx)); 841 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 842 PetscCall(PetscBTDestroy(&bt)); 843 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is)); 844 PetscCall(ISLocalToGlobalMappingCreateIS(is,<og)); 845 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 846 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 847 PetscCall(ISDestroy(&is)); 848 849 /* Preallocation */ 850 if (dm->prealloc_skip) { 851 PetscCall(MatSetUp(J)); 852 } else { 853 PetscCall(MatISGetLocalMat(J,&lJ)); 854 PetscCall(MatGetLocalToGlobalMapping(lJ,<og,NULL)); 855 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P)); 856 PetscCall(MatSetType(P,MATPREALLOCATOR)); 857 PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog)); 858 PetscCall(MatGetSize(lJ,&N,NULL)); 859 PetscCall(MatGetLocalSize(lJ,&n,NULL)); 860 PetscCall(MatSetSizes(P,n,n,N,N)); 861 PetscCall(MatSetBlockSize(P,dof)); 862 PetscCall(MatSetUp(P)); 863 for (i=0;i<nel;i++) { 864 PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES)); 865 } 866 PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ)); 867 PetscCall(MatISRestoreLocalMat(J,&lJ)); 868 PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc)); 869 PetscCall(MatDestroy(&P)); 870 871 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 872 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 873 } 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 878 { 879 PetscErrorCode ierr; 880 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 881 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 882 MPI_Comm comm; 883 PetscScalar *values; 884 DMBoundaryType bx,by; 885 ISLocalToGlobalMapping ltog; 886 DMDAStencilType st; 887 888 PetscFunctionBegin; 889 /* 890 nc - number of components per grid point 891 col - number of colors needed in one direction for single component problem 892 893 */ 894 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 895 col = 2*s + 1; 896 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 897 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 898 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 899 900 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 901 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 902 903 PetscCall(MatSetBlockSize(J,nc)); 904 /* determine the matrix preallocation information */ 905 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr); 906 for (i=xs; i<xs+nx; i++) { 907 908 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 909 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 910 911 for (j=ys; j<ys+ny; j++) { 912 slot = i - gxs + gnx*(j - gys); 913 914 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 915 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 916 917 cnt = 0; 918 for (k=0; k<nc; k++) { 919 for (l=lstart; l<lend+1; l++) { 920 for (p=pstart; p<pend+1; p++) { 921 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 922 cols[cnt++] = k + nc*(slot + gnx*l + p); 923 } 924 } 925 } 926 rows[k] = k + nc*(slot); 927 } 928 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 929 } 930 } 931 PetscCall(MatSetBlockSize(J,nc)); 932 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 933 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 934 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 935 936 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 937 938 /* 939 For each node in the grid: we get the neighbors in the local (on processor ordering 940 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 941 PETSc ordering. 942 */ 943 if (!da->prealloc_only) { 944 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 945 for (i=xs; i<xs+nx; i++) { 946 947 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 948 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 949 950 for (j=ys; j<ys+ny; j++) { 951 slot = i - gxs + gnx*(j - gys); 952 953 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 954 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 955 956 cnt = 0; 957 for (k=0; k<nc; k++) { 958 for (l=lstart; l<lend+1; l++) { 959 for (p=pstart; p<pend+1; p++) { 960 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 961 cols[cnt++] = k + nc*(slot + gnx*l + p); 962 } 963 } 964 } 965 rows[k] = k + nc*(slot); 966 } 967 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 968 } 969 } 970 PetscCall(PetscFree(values)); 971 /* do not copy values to GPU since they are all zero and not yet needed there */ 972 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 973 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 974 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 975 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 976 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 977 } 978 PetscCall(PetscFree2(rows,cols)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 983 { 984 PetscErrorCode ierr; 985 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 986 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 987 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 988 MPI_Comm comm; 989 PetscScalar *values; 990 DMBoundaryType bx,by,bz; 991 ISLocalToGlobalMapping ltog; 992 DMDAStencilType st; 993 994 PetscFunctionBegin; 995 /* 996 nc - number of components per grid point 997 col - number of colors needed in one direction for single component problem 998 999 */ 1000 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1001 col = 2*s + 1; 1002 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1003 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1004 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1005 1006 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1007 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1008 1009 PetscCall(MatSetBlockSize(J,nc)); 1010 /* determine the matrix preallocation information */ 1011 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr); 1012 for (i=xs; i<xs+nx; i++) { 1013 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1014 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1015 for (j=ys; j<ys+ny; j++) { 1016 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1017 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1018 for (k=zs; k<zs+nz; k++) { 1019 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1020 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1021 1022 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1023 1024 cnt = 0; 1025 for (l=0; l<nc; l++) { 1026 for (ii=istart; ii<iend+1; ii++) { 1027 for (jj=jstart; jj<jend+1; jj++) { 1028 for (kk=kstart; kk<kend+1; kk++) { 1029 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1030 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1031 } 1032 } 1033 } 1034 } 1035 rows[l] = l + nc*(slot); 1036 } 1037 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1038 } 1039 } 1040 } 1041 PetscCall(MatSetBlockSize(J,nc)); 1042 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 1043 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 1044 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1045 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1046 1047 /* 1048 For each node in the grid: we get the neighbors in the local (on processor ordering 1049 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1050 PETSc ordering. 1051 */ 1052 if (!da->prealloc_only) { 1053 PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values)); 1054 for (i=xs; i<xs+nx; i++) { 1055 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1056 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1057 for (j=ys; j<ys+ny; j++) { 1058 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1059 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1060 for (k=zs; k<zs+nz; k++) { 1061 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1062 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1063 1064 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1065 1066 cnt = 0; 1067 for (l=0; l<nc; l++) { 1068 for (ii=istart; ii<iend+1; ii++) { 1069 for (jj=jstart; jj<jend+1; jj++) { 1070 for (kk=kstart; kk<kend+1; kk++) { 1071 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1072 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1073 } 1074 } 1075 } 1076 } 1077 rows[l] = l + nc*(slot); 1078 } 1079 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 1080 } 1081 } 1082 } 1083 PetscCall(PetscFree(values)); 1084 /* do not copy values to GPU since they are all zero and not yet needed there */ 1085 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1086 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1087 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1088 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1089 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1090 } 1091 PetscCall(PetscFree2(rows,cols)); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1096 { 1097 PetscErrorCode ierr; 1098 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1099 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1100 MPI_Comm comm; 1101 DMBoundaryType bx,by; 1102 ISLocalToGlobalMapping ltog,mltog; 1103 DMDAStencilType st; 1104 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1105 1106 PetscFunctionBegin; 1107 /* 1108 nc - number of components per grid point 1109 col - number of colors needed in one direction for single component problem 1110 1111 */ 1112 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 1113 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1114 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1115 } 1116 col = 2*s + 1; 1117 /* 1118 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1119 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1120 */ 1121 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1122 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1123 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1124 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1125 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1126 1127 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 1128 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1129 1130 PetscCall(MatSetBlockSize(J,nc)); 1131 /* determine the matrix preallocation information */ 1132 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr); 1133 for (i=xs; i<xs+nx; i++) { 1134 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1135 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1136 1137 for (j=ys; j<ys+ny; j++) { 1138 slot = i - gxs + gnx*(j - gys); 1139 1140 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1141 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1142 1143 cnt = 0; 1144 for (k=0; k<nc; k++) { 1145 for (l=lstart; l<lend+1; l++) { 1146 for (p=pstart; p<pend+1; p++) { 1147 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1148 cols[cnt++] = k + nc*(slot + gnx*l + p); 1149 } 1150 } 1151 } 1152 rows[k] = k + nc*(slot); 1153 } 1154 if (removedups) { 1155 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1156 } else { 1157 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1158 } 1159 } 1160 } 1161 PetscCall(MatSetBlockSize(J,nc)); 1162 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1163 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1164 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1165 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1166 if (!mltog) { 1167 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1168 } 1169 1170 /* 1171 For each node in the grid: we get the neighbors in the local (on processor ordering 1172 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1173 PETSc ordering. 1174 */ 1175 if (!da->prealloc_only) { 1176 for (i=xs; i<xs+nx; i++) { 1177 1178 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1179 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1180 1181 for (j=ys; j<ys+ny; j++) { 1182 slot = i - gxs + gnx*(j - gys); 1183 1184 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1185 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1186 1187 cnt = 0; 1188 for (l=lstart; l<lend+1; l++) { 1189 for (p=pstart; p<pend+1; p++) { 1190 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1191 cols[cnt++] = nc*(slot + gnx*l + p); 1192 for (k=1; k<nc; k++) { 1193 cols[cnt] = 1 + cols[cnt-1];cnt++; 1194 } 1195 } 1196 } 1197 } 1198 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1199 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1200 } 1201 } 1202 /* do not copy values to GPU since they are all zero and not yet needed there */ 1203 PetscCall(MatBoundToCPU(J,&alreadyboundtocpu)); 1204 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1205 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1206 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1207 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1208 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1209 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1210 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1211 } 1212 } 1213 PetscCall(PetscFree2(rows,cols)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1218 { 1219 PetscErrorCode ierr; 1220 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1221 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1222 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1223 DM_DA *dd = (DM_DA*)da->data; 1224 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1225 MPI_Comm comm; 1226 DMBoundaryType bx,by; 1227 ISLocalToGlobalMapping ltog; 1228 DMDAStencilType st; 1229 PetscBool removedups = PETSC_FALSE; 1230 1231 PetscFunctionBegin; 1232 /* 1233 nc - number of components per grid point 1234 col - number of colors needed in one direction for single component problem 1235 1236 */ 1237 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 1238 col = 2*s + 1; 1239 /* 1240 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1241 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1242 */ 1243 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1244 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1245 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1246 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1247 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1248 1249 PetscCall(PetscMalloc1(col*col*nc,&cols)); 1250 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1251 1252 PetscCall(MatSetBlockSize(J,nc)); 1253 /* determine the matrix preallocation information */ 1254 ierr = MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);PetscCall(ierr); 1255 for (i=xs; i<xs+nx; i++) { 1256 1257 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1258 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1259 1260 for (j=ys; j<ys+ny; j++) { 1261 slot = i - gxs + gnx*(j - gys); 1262 1263 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1264 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1265 1266 for (k=0; k<nc; k++) { 1267 cnt = 0; 1268 for (l=lstart; l<lend+1; l++) { 1269 for (p=pstart; p<pend+1; p++) { 1270 if (l || p) { 1271 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1272 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1273 } 1274 } else { 1275 if (dfill) { 1276 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1277 } else { 1278 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1279 } 1280 } 1281 } 1282 } 1283 row = k + nc*(slot); 1284 maxcnt = PetscMax(maxcnt,cnt); 1285 if (removedups) { 1286 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1287 } else { 1288 PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1289 } 1290 } 1291 } 1292 } 1293 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1294 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1295 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1296 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1297 1298 /* 1299 For each node in the grid: we get the neighbors in the local (on processor ordering 1300 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1301 PETSc ordering. 1302 */ 1303 if (!da->prealloc_only) { 1304 for (i=xs; i<xs+nx; i++) { 1305 1306 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1307 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1308 1309 for (j=ys; j<ys+ny; j++) { 1310 slot = i - gxs + gnx*(j - gys); 1311 1312 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1313 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1314 1315 for (k=0; k<nc; k++) { 1316 cnt = 0; 1317 for (l=lstart; l<lend+1; l++) { 1318 for (p=pstart; p<pend+1; p++) { 1319 if (l || p) { 1320 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1321 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1322 } 1323 } else { 1324 if (dfill) { 1325 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1326 } else { 1327 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1328 } 1329 } 1330 } 1331 } 1332 row = k + nc*(slot); 1333 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1334 } 1335 } 1336 } 1337 /* do not copy values to GPU since they are all zero and not yet needed there */ 1338 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1339 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1340 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1341 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1342 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1343 } 1344 PetscCall(PetscFree(cols)); 1345 PetscFunctionReturn(0); 1346 } 1347 1348 /* ---------------------------------------------------------------------------------*/ 1349 1350 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1351 { 1352 PetscErrorCode ierr; 1353 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1354 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1355 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1356 MPI_Comm comm; 1357 DMBoundaryType bx,by,bz; 1358 ISLocalToGlobalMapping ltog,mltog; 1359 DMDAStencilType st; 1360 PetscBool removedups = PETSC_FALSE; 1361 1362 PetscFunctionBegin; 1363 /* 1364 nc - number of components per grid point 1365 col - number of colors needed in one direction for single component problem 1366 1367 */ 1368 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1369 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1370 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1371 } 1372 col = 2*s + 1; 1373 1374 /* 1375 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1376 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1377 */ 1378 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1379 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1380 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1381 1382 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1383 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1384 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1385 1386 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1387 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1388 1389 PetscCall(MatSetBlockSize(J,nc)); 1390 /* determine the matrix preallocation information */ 1391 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr); 1392 for (i=xs; i<xs+nx; i++) { 1393 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1394 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1395 for (j=ys; j<ys+ny; j++) { 1396 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1397 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1398 for (k=zs; k<zs+nz; k++) { 1399 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1400 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1401 1402 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1403 1404 cnt = 0; 1405 for (l=0; l<nc; l++) { 1406 for (ii=istart; ii<iend+1; ii++) { 1407 for (jj=jstart; jj<jend+1; jj++) { 1408 for (kk=kstart; kk<kend+1; kk++) { 1409 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1410 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1411 } 1412 } 1413 } 1414 } 1415 rows[l] = l + nc*(slot); 1416 } 1417 if (removedups) { 1418 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1419 } else { 1420 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1421 } 1422 } 1423 } 1424 } 1425 PetscCall(MatSetBlockSize(J,nc)); 1426 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1427 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1428 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1429 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1430 if (!mltog) { 1431 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1432 } 1433 1434 /* 1435 For each node in the grid: we get the neighbors in the local (on processor ordering 1436 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1437 PETSc ordering. 1438 */ 1439 if (!da->prealloc_only) { 1440 for (i=xs; i<xs+nx; i++) { 1441 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1442 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1443 for (j=ys; j<ys+ny; j++) { 1444 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1445 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1446 for (k=zs; k<zs+nz; k++) { 1447 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1448 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1449 1450 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1451 1452 cnt = 0; 1453 for (kk=kstart; kk<kend+1; kk++) { 1454 for (jj=jstart; jj<jend+1; jj++) { 1455 for (ii=istart; ii<iend+1; ii++) { 1456 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1457 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1458 for (l=1; l<nc; l++) { 1459 cols[cnt] = 1 + cols[cnt-1];cnt++; 1460 } 1461 } 1462 } 1463 } 1464 } 1465 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1466 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1467 } 1468 } 1469 } 1470 /* do not copy values to GPU since they are all zero and not yet needed there */ 1471 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1472 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1473 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1474 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1475 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1476 } 1477 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1478 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1479 } 1480 PetscCall(PetscFree2(rows,cols)); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 /* ---------------------------------------------------------------------------------*/ 1485 1486 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1487 { 1488 DM_DA *dd = (DM_DA*)da->data; 1489 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1490 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1491 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1492 DMBoundaryType bx; 1493 ISLocalToGlobalMapping ltog; 1494 PetscMPIInt rank,size; 1495 1496 PetscFunctionBegin; 1497 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank)); 1498 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 1499 1500 /* 1501 nc - number of components per grid point 1502 1503 */ 1504 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1505 PetscCheckFalse(s > 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1506 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1507 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1508 1509 PetscCall(MatSetBlockSize(J,nc)); 1510 PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols)); 1511 1512 /* 1513 note should be smaller for first and last process with no periodic 1514 does not handle dfill 1515 */ 1516 cnt = 0; 1517 /* coupling with process to the left */ 1518 for (i=0; i<s; i++) { 1519 for (j=0; j<nc; j++) { 1520 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1521 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1522 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1523 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1524 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1525 } 1526 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1527 cnt++; 1528 } 1529 } 1530 for (i=s; i<nx-s; i++) { 1531 for (j=0; j<nc; j++) { 1532 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1533 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1534 cnt++; 1535 } 1536 } 1537 /* coupling with process to the right */ 1538 for (i=nx-s; i<nx; i++) { 1539 for (j=0; j<nc; j++) { 1540 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1541 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1542 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1543 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1544 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1545 } 1546 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1547 cnt++; 1548 } 1549 } 1550 1551 PetscCall(MatSeqAIJSetPreallocation(J,0,cols)); 1552 PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols)); 1553 PetscCall(PetscFree2(cols,ocols)); 1554 1555 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1556 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1557 1558 /* 1559 For each node in the grid: we get the neighbors in the local (on processor ordering 1560 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1561 PETSc ordering. 1562 */ 1563 if (!da->prealloc_only) { 1564 PetscCall(PetscMalloc1(maxcnt,&cols)); 1565 row = xs*nc; 1566 /* coupling with process to the left */ 1567 for (i=xs; i<xs+s; i++) { 1568 for (j=0; j<nc; j++) { 1569 cnt = 0; 1570 if (rank) { 1571 for (l=0; l<s; l++) { 1572 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1573 } 1574 } 1575 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1576 for (l=0; l<s; l++) { 1577 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1578 } 1579 } 1580 if (dfill) { 1581 for (k=dfill[j]; k<dfill[j+1]; k++) { 1582 cols[cnt++] = i*nc + dfill[k]; 1583 } 1584 } else { 1585 for (k=0; k<nc; k++) { 1586 cols[cnt++] = i*nc + k; 1587 } 1588 } 1589 for (l=0; l<s; l++) { 1590 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1591 } 1592 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1593 row++; 1594 } 1595 } 1596 for (i=xs+s; i<xs+nx-s; i++) { 1597 for (j=0; j<nc; j++) { 1598 cnt = 0; 1599 for (l=0; l<s; l++) { 1600 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1601 } 1602 if (dfill) { 1603 for (k=dfill[j]; k<dfill[j+1]; k++) { 1604 cols[cnt++] = i*nc + dfill[k]; 1605 } 1606 } else { 1607 for (k=0; k<nc; k++) { 1608 cols[cnt++] = i*nc + k; 1609 } 1610 } 1611 for (l=0; l<s; l++) { 1612 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1613 } 1614 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1615 row++; 1616 } 1617 } 1618 /* coupling with process to the right */ 1619 for (i=xs+nx-s; i<xs+nx; i++) { 1620 for (j=0; j<nc; j++) { 1621 cnt = 0; 1622 for (l=0; l<s; l++) { 1623 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1624 } 1625 if (dfill) { 1626 for (k=dfill[j]; k<dfill[j+1]; k++) { 1627 cols[cnt++] = i*nc + dfill[k]; 1628 } 1629 } else { 1630 for (k=0; k<nc; k++) { 1631 cols[cnt++] = i*nc + k; 1632 } 1633 } 1634 if (rank < size-1) { 1635 for (l=0; l<s; l++) { 1636 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1637 } 1638 } 1639 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1640 for (l=0; l<s; l++) { 1641 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1642 } 1643 } 1644 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1645 row++; 1646 } 1647 } 1648 PetscCall(PetscFree(cols)); 1649 /* do not copy values to GPU since they are all zero and not yet needed there */ 1650 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1651 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1653 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1654 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 /* ---------------------------------------------------------------------------------*/ 1660 1661 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1662 { 1663 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1664 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1665 PetscInt istart,iend; 1666 DMBoundaryType bx; 1667 ISLocalToGlobalMapping ltog,mltog; 1668 1669 PetscFunctionBegin; 1670 /* 1671 nc - number of components per grid point 1672 col - number of colors needed in one direction for single component problem 1673 1674 */ 1675 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1676 if (bx == DM_BOUNDARY_NONE) { 1677 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1678 } 1679 col = 2*s + 1; 1680 1681 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1682 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1683 1684 PetscCall(MatSetBlockSize(J,nc)); 1685 PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL)); 1686 PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL)); 1687 1688 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1689 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1690 if (!mltog) { 1691 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1692 } 1693 1694 /* 1695 For each node in the grid: we get the neighbors in the local (on processor ordering 1696 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1697 PETSc ordering. 1698 */ 1699 if (!da->prealloc_only) { 1700 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1701 for (i=xs; i<xs+nx; i++) { 1702 istart = PetscMax(-s,gxs - i); 1703 iend = PetscMin(s,gxs + gnx - i - 1); 1704 slot = i - gxs; 1705 1706 cnt = 0; 1707 for (i1=istart; i1<iend+1; i1++) { 1708 cols[cnt++] = nc*(slot + i1); 1709 for (l=1; l<nc; l++) { 1710 cols[cnt] = 1 + cols[cnt-1];cnt++; 1711 } 1712 } 1713 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1714 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1715 } 1716 /* do not copy values to GPU since they are all zero and not yet needed there */ 1717 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1718 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1719 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1720 if (bx == DM_BOUNDARY_NONE) { 1721 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1722 } 1723 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1724 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1725 PetscCall(PetscFree2(rows,cols)); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 /* ---------------------------------------------------------------------------------*/ 1731 1732 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1733 { 1734 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1735 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1736 PetscInt istart,iend; 1737 DMBoundaryType bx; 1738 ISLocalToGlobalMapping ltog,mltog; 1739 1740 PetscFunctionBegin; 1741 /* 1742 nc - number of components per grid point 1743 col - number of colors needed in one direction for single component problem 1744 */ 1745 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1746 col = 2*s + 1; 1747 1748 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1749 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1750 1751 PetscCall(MatSetBlockSize(J,nc)); 1752 PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc)); 1753 1754 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1755 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1756 if (!mltog) { 1757 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1758 } 1759 1760 /* 1761 For each node in the grid: we get the neighbors in the local (on processor ordering 1762 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1763 PETSc ordering. 1764 */ 1765 if (!da->prealloc_only) { 1766 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1767 for (i=xs; i<xs+nx; i++) { 1768 istart = PetscMax(-s,gxs - i); 1769 iend = PetscMin(s,gxs + gnx - i - 1); 1770 slot = i - gxs; 1771 1772 cnt = 0; 1773 for (i1=istart; i1<iend+1; i1++) { 1774 cols[cnt++] = nc*(slot + i1); 1775 for (l=1; l<nc; l++) { 1776 cols[cnt] = 1 + cols[cnt-1];cnt++; 1777 } 1778 } 1779 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1780 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1781 } 1782 /* do not copy values to GPU since they are all zero and not yet needed there */ 1783 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1784 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1785 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1786 if (bx == DM_BOUNDARY_NONE) { 1787 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1788 } 1789 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1790 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1791 PetscCall(PetscFree2(rows,cols)); 1792 } 1793 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1798 { 1799 PetscErrorCode ierr; 1800 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1801 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1802 PetscInt istart,iend,jstart,jend,ii,jj; 1803 MPI_Comm comm; 1804 PetscScalar *values; 1805 DMBoundaryType bx,by; 1806 DMDAStencilType st; 1807 ISLocalToGlobalMapping ltog; 1808 1809 PetscFunctionBegin; 1810 /* 1811 nc - number of components per grid point 1812 col - number of colors needed in one direction for single component problem 1813 */ 1814 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1815 col = 2*s + 1; 1816 1817 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1818 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1819 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1820 1821 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 1822 1823 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1824 1825 /* determine the matrix preallocation information */ 1826 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);PetscCall(ierr); 1827 for (i=xs; i<xs+nx; i++) { 1828 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1829 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1830 for (j=ys; j<ys+ny; j++) { 1831 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1832 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1833 slot = i - gxs + gnx*(j - gys); 1834 1835 /* Find block columns in block row */ 1836 cnt = 0; 1837 for (ii=istart; ii<iend+1; ii++) { 1838 for (jj=jstart; jj<jend+1; jj++) { 1839 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1840 cols[cnt++] = slot + ii + gnx*jj; 1841 } 1842 } 1843 } 1844 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1845 } 1846 } 1847 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1848 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1849 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1850 1851 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1852 1853 /* 1854 For each node in the grid: we get the neighbors in the local (on processor ordering 1855 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1856 PETSc ordering. 1857 */ 1858 if (!da->prealloc_only) { 1859 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 1860 for (i=xs; i<xs+nx; i++) { 1861 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1862 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1863 for (j=ys; j<ys+ny; j++) { 1864 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1865 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1866 slot = i - gxs + gnx*(j - gys); 1867 cnt = 0; 1868 for (ii=istart; ii<iend+1; ii++) { 1869 for (jj=jstart; jj<jend+1; jj++) { 1870 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1871 cols[cnt++] = slot + ii + gnx*jj; 1872 } 1873 } 1874 } 1875 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1876 } 1877 } 1878 PetscCall(PetscFree(values)); 1879 /* do not copy values to GPU since they are all zero and not yet needed there */ 1880 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1881 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1882 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1883 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1884 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1885 } 1886 PetscCall(PetscFree(cols)); 1887 PetscFunctionReturn(0); 1888 } 1889 1890 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1891 { 1892 PetscErrorCode ierr; 1893 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1894 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1895 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1896 MPI_Comm comm; 1897 PetscScalar *values; 1898 DMBoundaryType bx,by,bz; 1899 DMDAStencilType st; 1900 ISLocalToGlobalMapping ltog; 1901 1902 PetscFunctionBegin; 1903 /* 1904 nc - number of components per grid point 1905 col - number of colors needed in one direction for single component problem 1906 1907 */ 1908 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 1909 col = 2*s + 1; 1910 1911 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1912 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1913 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1914 1915 PetscCall(PetscMalloc1(col*col*col,&cols)); 1916 1917 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1918 1919 /* determine the matrix preallocation information */ 1920 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);PetscCall(ierr); 1921 for (i=xs; i<xs+nx; i++) { 1922 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1923 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1924 for (j=ys; j<ys+ny; j++) { 1925 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1926 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1927 for (k=zs; k<zs+nz; k++) { 1928 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1929 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1930 1931 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1932 1933 /* Find block columns in block row */ 1934 cnt = 0; 1935 for (ii=istart; ii<iend+1; ii++) { 1936 for (jj=jstart; jj<jend+1; jj++) { 1937 for (kk=kstart; kk<kend+1; kk++) { 1938 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1939 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1940 } 1941 } 1942 } 1943 } 1944 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1945 } 1946 } 1947 } 1948 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1949 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1950 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 1951 1952 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1953 1954 /* 1955 For each node in the grid: we get the neighbors in the local (on processor ordering 1956 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1957 PETSc ordering. 1958 */ 1959 if (!da->prealloc_only) { 1960 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 1961 for (i=xs; i<xs+nx; i++) { 1962 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1963 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1964 for (j=ys; j<ys+ny; j++) { 1965 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1966 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1967 for (k=zs; k<zs+nz; k++) { 1968 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1969 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1970 1971 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1972 1973 cnt = 0; 1974 for (ii=istart; ii<iend+1; ii++) { 1975 for (jj=jstart; jj<jend+1; jj++) { 1976 for (kk=kstart; kk<kend+1; kk++) { 1977 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1978 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1979 } 1980 } 1981 } 1982 } 1983 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1984 } 1985 } 1986 } 1987 PetscCall(PetscFree(values)); 1988 /* do not copy values to GPU since they are all zero and not yet needed there */ 1989 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1990 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1991 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1992 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1993 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1994 } 1995 PetscCall(PetscFree(cols)); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /* 2000 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 2001 identify in the local ordering with periodic domain. 2002 */ 2003 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 2004 { 2005 PetscInt i,n; 2006 2007 PetscFunctionBegin; 2008 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row)); 2009 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col)); 2010 for (i=0,n=0; i<*cnt; i++) { 2011 if (col[i] >= *row) col[n++] = col[i]; 2012 } 2013 *cnt = n; 2014 PetscFunctionReturn(0); 2015 } 2016 2017 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2018 { 2019 PetscErrorCode ierr; 2020 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2021 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2022 PetscInt istart,iend,jstart,jend,ii,jj; 2023 MPI_Comm comm; 2024 PetscScalar *values; 2025 DMBoundaryType bx,by; 2026 DMDAStencilType st; 2027 ISLocalToGlobalMapping ltog; 2028 2029 PetscFunctionBegin; 2030 /* 2031 nc - number of components per grid point 2032 col - number of colors needed in one direction for single component problem 2033 */ 2034 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 2035 col = 2*s + 1; 2036 2037 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 2038 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 2039 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2040 2041 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 2042 2043 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2044 2045 /* determine the matrix preallocation information */ 2046 ierr = MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);PetscCall(ierr); 2047 for (i=xs; i<xs+nx; i++) { 2048 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2049 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2050 for (j=ys; j<ys+ny; j++) { 2051 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2052 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2053 slot = i - gxs + gnx*(j - gys); 2054 2055 /* Find block columns in block row */ 2056 cnt = 0; 2057 for (ii=istart; ii<iend+1; ii++) { 2058 for (jj=jstart; jj<jend+1; jj++) { 2059 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2060 cols[cnt++] = slot + ii + gnx*jj; 2061 } 2062 } 2063 } 2064 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2065 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2066 } 2067 } 2068 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2069 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2070 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 2071 2072 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2073 2074 /* 2075 For each node in the grid: we get the neighbors in the local (on processor ordering 2076 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2077 PETSc ordering. 2078 */ 2079 if (!da->prealloc_only) { 2080 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 2081 for (i=xs; i<xs+nx; i++) { 2082 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2083 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2084 for (j=ys; j<ys+ny; j++) { 2085 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2086 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2087 slot = i - gxs + gnx*(j - gys); 2088 2089 /* Find block columns in block row */ 2090 cnt = 0; 2091 for (ii=istart; ii<iend+1; ii++) { 2092 for (jj=jstart; jj<jend+1; jj++) { 2093 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2094 cols[cnt++] = slot + ii + gnx*jj; 2095 } 2096 } 2097 } 2098 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2099 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2100 } 2101 } 2102 PetscCall(PetscFree(values)); 2103 /* do not copy values to GPU since they are all zero and not yet needed there */ 2104 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2105 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2106 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2107 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2108 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2109 } 2110 PetscCall(PetscFree(cols)); 2111 PetscFunctionReturn(0); 2112 } 2113 2114 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2115 { 2116 PetscErrorCode ierr; 2117 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2118 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2119 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2120 MPI_Comm comm; 2121 PetscScalar *values; 2122 DMBoundaryType bx,by,bz; 2123 DMDAStencilType st; 2124 ISLocalToGlobalMapping ltog; 2125 2126 PetscFunctionBegin; 2127 /* 2128 nc - number of components per grid point 2129 col - number of colors needed in one direction for single component problem 2130 */ 2131 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 2132 col = 2*s + 1; 2133 2134 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2135 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2136 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2137 2138 /* create the matrix */ 2139 PetscCall(PetscMalloc1(col*col*col,&cols)); 2140 2141 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2142 2143 /* determine the matrix preallocation information */ 2144 ierr = MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);PetscCall(ierr); 2145 for (i=xs; i<xs+nx; i++) { 2146 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2147 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2148 for (j=ys; j<ys+ny; j++) { 2149 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2150 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2151 for (k=zs; k<zs+nz; k++) { 2152 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2153 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2154 2155 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2156 2157 /* Find block columns in block row */ 2158 cnt = 0; 2159 for (ii=istart; ii<iend+1; ii++) { 2160 for (jj=jstart; jj<jend+1; jj++) { 2161 for (kk=kstart; kk<kend+1; kk++) { 2162 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2163 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2164 } 2165 } 2166 } 2167 } 2168 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2169 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2170 } 2171 } 2172 } 2173 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2174 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2175 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 2176 2177 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2178 2179 /* 2180 For each node in the grid: we get the neighbors in the local (on processor ordering 2181 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2182 PETSc ordering. 2183 */ 2184 if (!da->prealloc_only) { 2185 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 2186 for (i=xs; i<xs+nx; i++) { 2187 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2188 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2189 for (j=ys; j<ys+ny; j++) { 2190 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2191 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2192 for (k=zs; k<zs+nz; k++) { 2193 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2194 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2195 2196 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2197 2198 cnt = 0; 2199 for (ii=istart; ii<iend+1; ii++) { 2200 for (jj=jstart; jj<jend+1; jj++) { 2201 for (kk=kstart; kk<kend+1; kk++) { 2202 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2203 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2204 } 2205 } 2206 } 2207 } 2208 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2209 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2210 } 2211 } 2212 } 2213 PetscCall(PetscFree(values)); 2214 /* do not copy values to GPU since they are all zero and not yet needed there */ 2215 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2216 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2217 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2218 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2219 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2220 } 2221 PetscCall(PetscFree(cols)); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 /* ---------------------------------------------------------------------------------*/ 2226 2227 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2228 { 2229 PetscErrorCode ierr; 2230 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2231 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2232 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2233 DM_DA *dd = (DM_DA*)da->data; 2234 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2235 MPI_Comm comm; 2236 PetscScalar *values; 2237 DMBoundaryType bx,by,bz; 2238 ISLocalToGlobalMapping ltog; 2239 DMDAStencilType st; 2240 PetscBool removedups = PETSC_FALSE; 2241 2242 PetscFunctionBegin; 2243 /* 2244 nc - number of components per grid point 2245 col - number of colors needed in one direction for single component problem 2246 2247 */ 2248 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 2249 col = 2*s + 1; 2250 PetscCheckFalse(bx == DM_BOUNDARY_PERIODIC && (m % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2251 by 2*stencil_width + 1\n"); 2252 PetscCheckFalse(by == DM_BOUNDARY_PERIODIC && (n % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2253 by 2*stencil_width + 1\n"); 2254 PetscCheckFalse(bz == DM_BOUNDARY_PERIODIC && (p % col),PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2255 by 2*stencil_width + 1\n"); 2256 2257 /* 2258 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2259 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2260 */ 2261 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2262 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2263 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2264 2265 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2266 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2267 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2268 2269 PetscCall(PetscMalloc1(col*col*col*nc,&cols)); 2270 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2271 2272 /* determine the matrix preallocation information */ 2273 ierr = MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);PetscCall(ierr); 2274 2275 PetscCall(MatSetBlockSize(J,nc)); 2276 for (i=xs; i<xs+nx; i++) { 2277 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2278 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2279 for (j=ys; j<ys+ny; j++) { 2280 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2281 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2282 for (k=zs; k<zs+nz; k++) { 2283 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2284 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2285 2286 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2287 2288 for (l=0; l<nc; l++) { 2289 cnt = 0; 2290 for (ii=istart; ii<iend+1; ii++) { 2291 for (jj=jstart; jj<jend+1; jj++) { 2292 for (kk=kstart; kk<kend+1; kk++) { 2293 if (ii || jj || kk) { 2294 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2295 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2296 } 2297 } else { 2298 if (dfill) { 2299 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2300 } else { 2301 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2302 } 2303 } 2304 } 2305 } 2306 } 2307 row = l + nc*(slot); 2308 maxcnt = PetscMax(maxcnt,cnt); 2309 if (removedups) { 2310 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2311 } else { 2312 PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2313 } 2314 } 2315 } 2316 } 2317 } 2318 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 2319 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 2320 ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr); 2321 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2322 2323 /* 2324 For each node in the grid: we get the neighbors in the local (on processor ordering 2325 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2326 PETSc ordering. 2327 */ 2328 if (!da->prealloc_only) { 2329 PetscCall(PetscCalloc1(maxcnt,&values)); 2330 for (i=xs; i<xs+nx; i++) { 2331 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2332 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2333 for (j=ys; j<ys+ny; j++) { 2334 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2335 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2336 for (k=zs; k<zs+nz; k++) { 2337 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2338 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2339 2340 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2341 2342 for (l=0; l<nc; l++) { 2343 cnt = 0; 2344 for (ii=istart; ii<iend+1; ii++) { 2345 for (jj=jstart; jj<jend+1; jj++) { 2346 for (kk=kstart; kk<kend+1; kk++) { 2347 if (ii || jj || kk) { 2348 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2349 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2350 } 2351 } else { 2352 if (dfill) { 2353 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2354 } else { 2355 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2356 } 2357 } 2358 } 2359 } 2360 } 2361 row = l + nc*(slot); 2362 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES)); 2363 } 2364 } 2365 } 2366 } 2367 PetscCall(PetscFree(values)); 2368 /* do not copy values to GPU since they are all zero and not yet needed there */ 2369 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2370 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2371 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2372 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2373 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2374 } 2375 PetscCall(PetscFree(cols)); 2376 PetscFunctionReturn(0); 2377 } 2378