1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1,&fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1,rfill)); 63 PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i,k,cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w,&dd->ofillcols)); 76 for (i=0; i<dd->w; i++) { 77 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i=0; i<dd->w; i++) { 80 if (dd->ofillcols[i]) { 81 dd->ofillcols[i] = cnt++; 82 } 83 } 84 PetscFunctionReturn(0); 85 } 86 87 /*@ 88 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 89 of the matrix returned by DMCreateMatrix(). 90 91 Logically Collective on da 92 93 Input Parameters: 94 + da - the distributed array 95 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 96 - ofill - the fill pattern in the off-diagonal blocks 97 98 Level: developer 99 100 Notes: 101 This only makes sense when you are doing multicomponent problems but using the 102 MPIAIJ matrix format 103 104 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 105 representing coupling and 0 entries for missing coupling. For example 106 $ dfill[9] = {1, 0, 0, 107 $ 1, 1, 0, 108 $ 0, 1, 1} 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 120 @*/ 121 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 122 { 123 DM_DA *dd = (DM_DA*)da->data; 124 125 PetscFunctionBegin; 126 /* save the given dfill and ofill information */ 127 PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill)); 128 PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill)); 129 130 /* count nonzeros in ofill columns */ 131 PetscCall(DMDASetBlockFills_Private2(dd)); 132 133 PetscFunctionReturn(0); 134 } 135 136 /*@ 137 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 138 of the matrix returned by DMCreateMatrix(), using sparse representations 139 of fill patterns. 140 141 Logically Collective on da 142 143 Input Parameters: 144 + da - the distributed array 145 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 146 - ofill - the sparse fill pattern in the off-diagonal blocks 147 148 Level: developer 149 150 Notes: This only makes sense when you are doing multicomponent problems but using the 151 MPIAIJ matrix format 152 153 The format for dfill and ofill is a sparse representation of a 154 dof-by-dof matrix with 1 entries representing coupling and 0 entries 155 for missing coupling. The sparse representation is a 1 dimensional 156 array of length nz + dof + 1, where nz is the number of non-zeros in 157 the matrix. The first dof entries in the array give the 158 starting array indices of each row's items in the rest of the array, 159 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 160 and the remaining nz items give the column indices of each of 161 the 1s within the logical 2D matrix. Each row's items within 162 the array are the column indices of the 1s within that row 163 of the 2D matrix. PETSc developers may recognize that this is the 164 same format as that computed by the DMDASetBlockFills_Private() 165 function from a dense 2D matrix representation. 166 167 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 168 can be represented in the dfill, ofill format 169 170 Contributed by Philip C. Roth 171 172 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 173 174 @*/ 175 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 176 { 177 DM_DA *dd = (DM_DA*)da->data; 178 179 PetscFunctionBegin; 180 /* save the given dfill and ofill information */ 181 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill)); 182 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill)); 183 184 /* count nonzeros in ofill columns */ 185 PetscCall(DMDASetBlockFills_Private2(dd)); 186 187 PetscFunctionReturn(0); 188 } 189 190 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 191 { 192 PetscInt dim,m,n,p,nc; 193 DMBoundaryType bx,by,bz; 194 MPI_Comm comm; 195 PetscMPIInt size; 196 PetscBool isBAIJ; 197 DM_DA *dd = (DM_DA*)da->data; 198 199 PetscFunctionBegin; 200 /* 201 m 202 ------------------------------------------------------ 203 | | 204 | | 205 | ---------------------- | 206 | | | | 207 n | yn | | | 208 | | | | 209 | .--------------------- | 210 | (xs,ys) xn | 211 | . | 212 | (gxs,gys) | 213 | | 214 ----------------------------------------------------- 215 */ 216 217 /* 218 nc - number of components per grid point 219 col - number of colors needed in one direction for single component problem 220 221 */ 222 PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL)); 223 224 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 225 PetscCallMPI(MPI_Comm_size(comm,&size)); 226 if (ctype == IS_COLORING_LOCAL) { 227 if (size == 1) { 228 ctype = IS_COLORING_GLOBAL; 229 } else if (dim > 1) { 230 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 231 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 232 } 233 } 234 } 235 236 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 237 matrices is for the blocks, not the individual matrix elements */ 238 PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ)); 239 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ)); 240 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ)); 241 if (isBAIJ) { 242 dd->w = 1; 243 dd->xs = dd->xs/nc; 244 dd->xe = dd->xe/nc; 245 dd->Xs = dd->Xs/nc; 246 dd->Xe = dd->Xe/nc; 247 } 248 249 /* 250 We do not provide a getcoloring function in the DMDA operations because 251 the basic DMDA does not know about matrices. We think of DMDA as being 252 more low-level then matrices. 253 */ 254 if (dim == 1) { 255 PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring)); 256 } else if (dim == 2) { 257 PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring)); 258 } else if (dim == 3) { 259 PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring)); 260 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 261 if (isBAIJ) { 262 dd->w = nc; 263 dd->xs = dd->xs*nc; 264 dd->xe = dd->xe*nc; 265 dd->Xs = dd->Xs*nc; 266 dd->Xe = dd->Xe*nc; 267 } 268 PetscFunctionReturn(0); 269 } 270 271 /* ---------------------------------------------------------------------------------*/ 272 273 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 274 { 275 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 276 PetscInt ncolors; 277 MPI_Comm comm; 278 DMBoundaryType bx,by; 279 DMDAStencilType st; 280 ISColoringValue *colors; 281 DM_DA *dd = (DM_DA*)da->data; 282 283 PetscFunctionBegin; 284 /* 285 nc - number of components per grid point 286 col - number of colors needed in one direction for single component problem 287 288 */ 289 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 290 col = 2*s + 1; 291 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 292 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 293 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 294 295 /* special case as taught to us by Paul Hovland */ 296 if (st == DMDA_STENCIL_STAR && s == 1) { 297 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring)); 298 } else { 299 if (ctype == IS_COLORING_GLOBAL) { 300 if (!dd->localcoloring) { 301 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 302 ii = 0; 303 for (j=ys; j<ys+ny; j++) { 304 for (i=xs; i<xs+nx; i++) { 305 for (k=0; k<nc; k++) { 306 colors[ii++] = k + nc*((i % col) + col*(j % col)); 307 } 308 } 309 } 310 ncolors = nc + nc*(col-1 + col*(col-1)); 311 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 312 } 313 *coloring = dd->localcoloring; 314 } else if (ctype == IS_COLORING_LOCAL) { 315 if (!dd->ghostedcoloring) { 316 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 317 ii = 0; 318 for (j=gys; j<gys+gny; j++) { 319 for (i=gxs; i<gxs+gnx; i++) { 320 for (k=0; k<nc; k++) { 321 /* the complicated stuff is to handle periodic boundaries */ 322 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 323 } 324 } 325 } 326 ncolors = nc + nc*(col - 1 + col*(col-1)); 327 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 328 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 329 330 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 331 } 332 *coloring = dd->ghostedcoloring; 333 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 334 } 335 PetscCall(ISColoringReference(*coloring)); 336 PetscFunctionReturn(0); 337 } 338 339 /* ---------------------------------------------------------------------------------*/ 340 341 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 342 { 343 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 344 PetscInt ncolors; 345 MPI_Comm comm; 346 DMBoundaryType bx,by,bz; 347 DMDAStencilType st; 348 ISColoringValue *colors; 349 DM_DA *dd = (DM_DA*)da->data; 350 351 PetscFunctionBegin; 352 /* 353 nc - number of components per grid point 354 col - number of colors needed in one direction for single component problem 355 356 */ 357 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 358 col = 2*s + 1; 359 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 360 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 361 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 362 363 /* create the coloring */ 364 if (ctype == IS_COLORING_GLOBAL) { 365 if (!dd->localcoloring) { 366 PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors)); 367 ii = 0; 368 for (k=zs; k<zs+nz; k++) { 369 for (j=ys; j<ys+ny; j++) { 370 for (i=xs; i<xs+nx; i++) { 371 for (l=0; l<nc; l++) { 372 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 373 } 374 } 375 } 376 } 377 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 378 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 379 } 380 *coloring = dd->localcoloring; 381 } else if (ctype == IS_COLORING_LOCAL) { 382 if (!dd->ghostedcoloring) { 383 PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors)); 384 ii = 0; 385 for (k=gzs; k<gzs+gnz; k++) { 386 for (j=gys; j<gys+gny; j++) { 387 for (i=gxs; i<gxs+gnx; i++) { 388 for (l=0; l<nc; l++) { 389 /* the complicated stuff is to handle periodic boundaries */ 390 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 391 } 392 } 393 } 394 } 395 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 396 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 397 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 398 } 399 *coloring = dd->ghostedcoloring; 400 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 401 PetscCall(ISColoringReference(*coloring)); 402 PetscFunctionReturn(0); 403 } 404 405 /* ---------------------------------------------------------------------------------*/ 406 407 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 408 { 409 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 410 PetscInt ncolors; 411 MPI_Comm comm; 412 DMBoundaryType bx; 413 ISColoringValue *colors; 414 DM_DA *dd = (DM_DA*)da->data; 415 416 PetscFunctionBegin; 417 /* 418 nc - number of components per grid point 419 col - number of colors needed in one direction for single component problem 420 421 */ 422 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 423 col = 2*s + 1; 424 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 425 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 426 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 427 428 /* create the coloring */ 429 if (ctype == IS_COLORING_GLOBAL) { 430 if (!dd->localcoloring) { 431 PetscCall(PetscMalloc1(nc*nx,&colors)); 432 if (dd->ofillcols) { 433 PetscInt tc = 0; 434 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 435 i1 = 0; 436 for (i=xs; i<xs+nx; i++) { 437 for (l=0; l<nc; l++) { 438 if (dd->ofillcols[l] && (i % col)) { 439 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 440 } else { 441 colors[i1++] = l; 442 } 443 } 444 } 445 ncolors = nc + 2*s*tc; 446 } else { 447 i1 = 0; 448 for (i=xs; i<xs+nx; i++) { 449 for (l=0; l<nc; l++) { 450 colors[i1++] = l + nc*(i % col); 451 } 452 } 453 ncolors = nc + nc*(col-1); 454 } 455 PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 456 } 457 *coloring = dd->localcoloring; 458 } else if (ctype == IS_COLORING_LOCAL) { 459 if (!dd->ghostedcoloring) { 460 PetscCall(PetscMalloc1(nc*gnx,&colors)); 461 i1 = 0; 462 for (i=gxs; i<gxs+gnx; i++) { 463 for (l=0; l<nc; l++) { 464 /* the complicated stuff is to handle periodic boundaries */ 465 colors[i1++] = l + nc*(SetInRange(i,m) % col); 466 } 467 } 468 ncolors = nc + nc*(col-1); 469 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 470 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 471 } 472 *coloring = dd->ghostedcoloring; 473 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 474 PetscCall(ISColoringReference(*coloring)); 475 PetscFunctionReturn(0); 476 } 477 478 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 479 { 480 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 481 PetscInt ncolors; 482 MPI_Comm comm; 483 DMBoundaryType bx,by; 484 ISColoringValue *colors; 485 DM_DA *dd = (DM_DA*)da->data; 486 487 PetscFunctionBegin; 488 /* 489 nc - number of components per grid point 490 col - number of colors needed in one direction for single component problem 491 492 */ 493 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL)); 494 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 495 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 496 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 497 /* create the coloring */ 498 if (ctype == IS_COLORING_GLOBAL) { 499 if (!dd->localcoloring) { 500 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 501 ii = 0; 502 for (j=ys; j<ys+ny; j++) { 503 for (i=xs; i<xs+nx; i++) { 504 for (k=0; k<nc; k++) { 505 colors[ii++] = k + nc*((3*j+i) % 5); 506 } 507 } 508 } 509 ncolors = 5*nc; 510 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 511 } 512 *coloring = dd->localcoloring; 513 } else if (ctype == IS_COLORING_LOCAL) { 514 if (!dd->ghostedcoloring) { 515 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 516 ii = 0; 517 for (j=gys; j<gys+gny; j++) { 518 for (i=gxs; i<gxs+gnx; i++) { 519 for (k=0; k<nc; k++) { 520 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 521 } 522 } 523 } 524 ncolors = 5*nc; 525 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 526 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 527 } 528 *coloring = dd->ghostedcoloring; 529 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 530 PetscFunctionReturn(0); 531 } 532 533 /* =========================================================================== */ 534 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 535 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 536 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 537 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 540 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 541 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 542 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 543 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 544 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 545 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 546 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 547 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 548 549 /*@C 550 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 551 552 Logically Collective on mat 553 554 Input Parameters: 555 + mat - the matrix 556 - da - the da 557 558 Level: intermediate 559 560 @*/ 561 PetscErrorCode MatSetupDM(Mat mat,DM da) 562 { 563 PetscFunctionBegin; 564 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 565 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 566 PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da)); 567 PetscFunctionReturn(0); 568 } 569 570 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 571 { 572 DM da; 573 const char *prefix; 574 Mat Anatural; 575 AO ao; 576 PetscInt rstart,rend,*petsc,i; 577 IS is; 578 MPI_Comm comm; 579 PetscViewerFormat format; 580 581 PetscFunctionBegin; 582 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 583 PetscCall(PetscViewerGetFormat(viewer,&format)); 584 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 585 586 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 587 PetscCall(MatGetDM(A, &da)); 588 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 589 590 PetscCall(DMDAGetAO(da,&ao)); 591 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 592 PetscCall(PetscMalloc1(rend-rstart,&petsc)); 593 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 594 PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc)); 595 PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is)); 596 597 /* call viewer on natural ordering */ 598 PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural)); 599 PetscCall(ISDestroy(&is)); 600 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix)); 601 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix)); 602 PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name)); 603 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 604 PetscCall(MatView(Anatural,viewer)); 605 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 606 PetscCall(MatDestroy(&Anatural)); 607 PetscFunctionReturn(0); 608 } 609 610 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 611 { 612 DM da; 613 Mat Anatural,Aapp; 614 AO ao; 615 PetscInt rstart,rend,*app,i,m,n,M,N; 616 IS is; 617 MPI_Comm comm; 618 619 PetscFunctionBegin; 620 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 621 PetscCall(MatGetDM(A, &da)); 622 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 623 624 /* Load the matrix in natural ordering */ 625 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural)); 626 PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name)); 627 PetscCall(MatGetSize(A,&M,&N)); 628 PetscCall(MatGetLocalSize(A,&m,&n)); 629 PetscCall(MatSetSizes(Anatural,m,n,M,N)); 630 PetscCall(MatLoad(Anatural,viewer)); 631 632 /* Map natural ordering to application ordering and create IS */ 633 PetscCall(DMDAGetAO(da,&ao)); 634 PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend)); 635 PetscCall(PetscMalloc1(rend-rstart,&app)); 636 for (i=rstart; i<rend; i++) app[i-rstart] = i; 637 PetscCall(AOPetscToApplication(ao,rend-rstart,app)); 638 PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is)); 639 640 /* Do permutation and replace header */ 641 PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp)); 642 PetscCall(MatHeaderReplace(A,&Aapp)); 643 PetscCall(ISDestroy(&is)); 644 PetscCall(MatDestroy(&Anatural)); 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 649 { 650 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 651 Mat A; 652 MPI_Comm comm; 653 MatType Atype; 654 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 655 MatType mtype; 656 PetscMPIInt size; 657 DM_DA *dd = (DM_DA*)da->data; 658 659 PetscFunctionBegin; 660 PetscCall(MatInitializePackage()); 661 mtype = da->mattype; 662 663 /* 664 m 665 ------------------------------------------------------ 666 | | 667 | | 668 | ---------------------- | 669 | | | | 670 n | ny | | | 671 | | | | 672 | .--------------------- | 673 | (xs,ys) nx | 674 | . | 675 | (gxs,gys) | 676 | | 677 ----------------------------------------------------- 678 */ 679 680 /* 681 nc - number of components per grid point 682 col - number of colors needed in one direction for single component problem 683 684 */ 685 M = dd->M; 686 N = dd->N; 687 P = dd->P; 688 dim = da->dim; 689 dof = dd->w; 690 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 691 PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz)); 692 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 693 PetscCall(MatCreate(comm,&A)); 694 PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P)); 695 PetscCall(MatSetType(A,mtype)); 696 PetscCall(MatSetFromOptions(A)); 697 if (dof*nx*ny*nz < da->bind_below) { 698 PetscCall(MatSetBindingPropagates(A,PETSC_TRUE)); 699 PetscCall(MatBindToCPU(A,PETSC_TRUE)); 700 } 701 PetscCall(MatSetDM(A,da)); 702 if (da->structure_only) { 703 PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE)); 704 } 705 PetscCall(MatGetType(A,&Atype)); 706 /* 707 We do not provide a getmatrix function in the DMDA operations because 708 the basic DMDA does not know about matrices. We think of DMDA as being more 709 more low-level than matrices. This is kind of cheating but, cause sometimes 710 we think of DMDA has higher level than matrices. 711 712 We could switch based on Atype (or mtype), but we do not since the 713 specialized setting routines depend only on the particular preallocation 714 details of the matrix, not the type itself. 715 */ 716 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij)); 717 if (!aij) { 718 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij)); 719 } 720 if (!aij) { 721 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij)); 722 if (!baij) { 723 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij)); 724 } 725 if (!baij) { 726 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij)); 727 if (!sbaij) { 728 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij)); 729 } 730 if (!sbaij) { 731 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell)); 732 if (!sell) { 733 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell)); 734 } 735 } 736 if (!sell) { 737 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is)); 738 } 739 } 740 } 741 if (aij) { 742 if (dim == 1) { 743 if (dd->ofill) { 744 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A)); 745 } else { 746 DMBoundaryType bx; 747 PetscMPIInt size; 748 PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL)); 749 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 750 if (size == 1 && bx == DM_BOUNDARY_NONE) { 751 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A)); 752 } else { 753 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A)); 754 } 755 } 756 } else if (dim == 2) { 757 if (dd->ofill) { 758 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A)); 759 } else { 760 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A)); 761 } 762 } else if (dim == 3) { 763 if (dd->ofill) { 764 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A)); 765 } else { 766 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A)); 767 } 768 } 769 } else if (baij) { 770 if (dim == 2) { 771 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A)); 772 } else if (dim == 3) { 773 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A)); 774 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 775 } else if (sbaij) { 776 if (dim == 2) { 777 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A)); 778 } else if (dim == 3) { 779 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A)); 780 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 781 } else if (sell) { 782 if (dim == 2) { 783 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A)); 784 } else if (dim == 3) { 785 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A)); 786 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 787 } else if (is) { 788 PetscCall(DMCreateMatrix_DA_IS(da,A)); 789 } else { 790 ISLocalToGlobalMapping ltog; 791 792 PetscCall(MatSetBlockSize(A,dof)); 793 PetscCall(MatSetUp(A)); 794 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 795 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 796 } 797 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 798 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 799 PetscCall(MatSetDM(A,da)); 800 PetscCallMPI(MPI_Comm_size(comm,&size)); 801 if (size > 1) { 802 /* change viewer to display matrix in natural ordering */ 803 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 804 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 805 } 806 *J = A; 807 PetscFunctionReturn(0); 808 } 809 810 /* ---------------------------------------------------------------------------------*/ 811 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 812 813 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 814 { 815 DM_DA *da = (DM_DA*)dm->data; 816 Mat lJ,P; 817 ISLocalToGlobalMapping ltog; 818 IS is; 819 PetscBT bt; 820 const PetscInt *e_loc,*idx; 821 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 822 823 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 824 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 825 PetscFunctionBegin; 826 dof = da->w; 827 PetscCall(MatSetBlockSize(J,dof)); 828 PetscCall(DMGetLocalToGlobalMapping(dm,<og)); 829 830 /* flag local elements indices in local DMDA numbering */ 831 PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv)); 832 PetscCall(PetscBTCreate(nv/dof,&bt)); 833 PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 834 for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i])); 835 836 /* filter out (set to -1) the global indices not used by the local elements */ 837 PetscCall(PetscMalloc1(nv/dof,&gidx)); 838 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx)); 839 PetscCall(PetscArraycpy(gidx,idx,nv/dof)); 840 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx)); 841 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 842 PetscCall(PetscBTDestroy(&bt)); 843 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is)); 844 PetscCall(ISLocalToGlobalMappingCreateIS(is,<og)); 845 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 846 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 847 PetscCall(ISDestroy(&is)); 848 849 /* Preallocation */ 850 if (dm->prealloc_skip) { 851 PetscCall(MatSetUp(J)); 852 } else { 853 PetscCall(MatISGetLocalMat(J,&lJ)); 854 PetscCall(MatGetLocalToGlobalMapping(lJ,<og,NULL)); 855 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P)); 856 PetscCall(MatSetType(P,MATPREALLOCATOR)); 857 PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog)); 858 PetscCall(MatGetSize(lJ,&N,NULL)); 859 PetscCall(MatGetLocalSize(lJ,&n,NULL)); 860 PetscCall(MatSetSizes(P,n,n,N,N)); 861 PetscCall(MatSetBlockSize(P,dof)); 862 PetscCall(MatSetUp(P)); 863 for (i=0;i<nel;i++) { 864 PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES)); 865 } 866 PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ)); 867 PetscCall(MatISRestoreLocalMat(J,&lJ)); 868 PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc)); 869 PetscCall(MatDestroy(&P)); 870 871 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 872 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 873 } 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 878 { 879 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 880 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 881 MPI_Comm comm; 882 PetscScalar *values; 883 DMBoundaryType bx,by; 884 ISLocalToGlobalMapping ltog; 885 DMDAStencilType st; 886 887 PetscFunctionBegin; 888 /* 889 nc - number of components per grid point 890 col - number of colors needed in one direction for single component problem 891 892 */ 893 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 894 col = 2*s + 1; 895 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 896 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 897 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 898 899 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 900 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 901 902 PetscCall(MatSetBlockSize(J,nc)); 903 /* determine the matrix preallocation information */ 904 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 905 for (i=xs; i<xs+nx; i++) { 906 907 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 908 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 909 910 for (j=ys; j<ys+ny; j++) { 911 slot = i - gxs + gnx*(j - gys); 912 913 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 914 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 915 916 cnt = 0; 917 for (k=0; k<nc; k++) { 918 for (l=lstart; l<lend+1; l++) { 919 for (p=pstart; p<pend+1; p++) { 920 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 921 cols[cnt++] = k + nc*(slot + gnx*l + p); 922 } 923 } 924 } 925 rows[k] = k + nc*(slot); 926 } 927 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 928 } 929 } 930 PetscCall(MatSetBlockSize(J,nc)); 931 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 932 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 933 MatPreallocateEnd(dnz,onz); 934 935 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 936 937 /* 938 For each node in the grid: we get the neighbors in the local (on processor ordering 939 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 940 PETSc ordering. 941 */ 942 if (!da->prealloc_only) { 943 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 944 for (i=xs; i<xs+nx; i++) { 945 946 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 947 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 948 949 for (j=ys; j<ys+ny; j++) { 950 slot = i - gxs + gnx*(j - gys); 951 952 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 953 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 954 955 cnt = 0; 956 for (k=0; k<nc; k++) { 957 for (l=lstart; l<lend+1; l++) { 958 for (p=pstart; p<pend+1; p++) { 959 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 960 cols[cnt++] = k + nc*(slot + gnx*l + p); 961 } 962 } 963 } 964 rows[k] = k + nc*(slot); 965 } 966 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 967 } 968 } 969 PetscCall(PetscFree(values)); 970 /* do not copy values to GPU since they are all zero and not yet needed there */ 971 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 972 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 973 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 974 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 975 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 976 } 977 PetscCall(PetscFree2(rows,cols)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 982 { 983 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 984 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 985 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 986 MPI_Comm comm; 987 PetscScalar *values; 988 DMBoundaryType bx,by,bz; 989 ISLocalToGlobalMapping ltog; 990 DMDAStencilType st; 991 992 PetscFunctionBegin; 993 /* 994 nc - number of components per grid point 995 col - number of colors needed in one direction for single component problem 996 997 */ 998 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 999 col = 2*s + 1; 1000 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1001 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1002 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1003 1004 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1005 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1006 1007 PetscCall(MatSetBlockSize(J,nc)); 1008 /* determine the matrix preallocation information */ 1009 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 1010 for (i=xs; i<xs+nx; i++) { 1011 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1012 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1013 for (j=ys; j<ys+ny; j++) { 1014 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1015 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1016 for (k=zs; k<zs+nz; k++) { 1017 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1018 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1019 1020 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1021 1022 cnt = 0; 1023 for (l=0; l<nc; l++) { 1024 for (ii=istart; ii<iend+1; ii++) { 1025 for (jj=jstart; jj<jend+1; jj++) { 1026 for (kk=kstart; kk<kend+1; kk++) { 1027 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1028 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1029 } 1030 } 1031 } 1032 } 1033 rows[l] = l + nc*(slot); 1034 } 1035 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1036 } 1037 } 1038 } 1039 PetscCall(MatSetBlockSize(J,nc)); 1040 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 1041 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 1042 MatPreallocateEnd(dnz,onz); 1043 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1044 1045 /* 1046 For each node in the grid: we get the neighbors in the local (on processor ordering 1047 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1048 PETSc ordering. 1049 */ 1050 if (!da->prealloc_only) { 1051 PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values)); 1052 for (i=xs; i<xs+nx; i++) { 1053 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1054 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1055 for (j=ys; j<ys+ny; j++) { 1056 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1057 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1058 for (k=zs; k<zs+nz; k++) { 1059 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1060 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1061 1062 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1063 1064 cnt = 0; 1065 for (l=0; l<nc; l++) { 1066 for (ii=istart; ii<iend+1; ii++) { 1067 for (jj=jstart; jj<jend+1; jj++) { 1068 for (kk=kstart; kk<kend+1; kk++) { 1069 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1070 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1071 } 1072 } 1073 } 1074 } 1075 rows[l] = l + nc*(slot); 1076 } 1077 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 1078 } 1079 } 1080 } 1081 PetscCall(PetscFree(values)); 1082 /* do not copy values to GPU since they are all zero and not yet needed there */ 1083 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1084 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1085 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1086 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1087 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1088 } 1089 PetscCall(PetscFree2(rows,cols)); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1094 { 1095 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1096 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1097 MPI_Comm comm; 1098 DMBoundaryType bx,by; 1099 ISLocalToGlobalMapping ltog,mltog; 1100 DMDAStencilType st; 1101 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1102 1103 PetscFunctionBegin; 1104 /* 1105 nc - number of components per grid point 1106 col - number of colors needed in one direction for single component problem 1107 1108 */ 1109 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1110 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1111 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1112 } 1113 col = 2*s + 1; 1114 /* 1115 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1116 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1117 */ 1118 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1119 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1120 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1121 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1122 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1123 1124 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 1125 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1126 1127 PetscCall(MatSetBlockSize(J,nc)); 1128 /* determine the matrix preallocation information */ 1129 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 1130 for (i=xs; i<xs+nx; i++) { 1131 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1132 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1133 1134 for (j=ys; j<ys+ny; j++) { 1135 slot = i - gxs + gnx*(j - gys); 1136 1137 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1138 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1139 1140 cnt = 0; 1141 for (k=0; k<nc; k++) { 1142 for (l=lstart; l<lend+1; l++) { 1143 for (p=pstart; p<pend+1; p++) { 1144 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1145 cols[cnt++] = k + nc*(slot + gnx*l + p); 1146 } 1147 } 1148 } 1149 rows[k] = k + nc*(slot); 1150 } 1151 if (removedups) { 1152 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1153 } else { 1154 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1155 } 1156 } 1157 } 1158 PetscCall(MatSetBlockSize(J,nc)); 1159 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1160 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1161 MatPreallocateEnd(dnz,onz); 1162 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1163 if (!mltog) { 1164 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1165 } 1166 1167 /* 1168 For each node in the grid: we get the neighbors in the local (on processor ordering 1169 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1170 PETSc ordering. 1171 */ 1172 if (!da->prealloc_only) { 1173 for (i=xs; i<xs+nx; i++) { 1174 1175 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1176 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1177 1178 for (j=ys; j<ys+ny; j++) { 1179 slot = i - gxs + gnx*(j - gys); 1180 1181 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1182 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1183 1184 cnt = 0; 1185 for (l=lstart; l<lend+1; l++) { 1186 for (p=pstart; p<pend+1; p++) { 1187 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1188 cols[cnt++] = nc*(slot + gnx*l + p); 1189 for (k=1; k<nc; k++) { 1190 cols[cnt] = 1 + cols[cnt-1];cnt++; 1191 } 1192 } 1193 } 1194 } 1195 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1196 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1197 } 1198 } 1199 /* do not copy values to GPU since they are all zero and not yet needed there */ 1200 PetscCall(MatBoundToCPU(J,&alreadyboundtocpu)); 1201 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1202 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1203 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1204 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1205 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1206 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) { 1207 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1208 } 1209 } 1210 PetscCall(PetscFree2(rows,cols)); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1215 { 1216 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1217 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1218 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1219 DM_DA *dd = (DM_DA*)da->data; 1220 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1221 MPI_Comm comm; 1222 DMBoundaryType bx,by; 1223 ISLocalToGlobalMapping ltog; 1224 DMDAStencilType st; 1225 PetscBool removedups = PETSC_FALSE; 1226 1227 PetscFunctionBegin; 1228 /* 1229 nc - number of components per grid point 1230 col - number of colors needed in one direction for single component problem 1231 1232 */ 1233 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1234 col = 2*s + 1; 1235 /* 1236 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1237 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1238 */ 1239 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1240 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1241 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1242 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1243 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1244 1245 PetscCall(PetscMalloc1(col*col*nc,&cols)); 1246 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1247 1248 PetscCall(MatSetBlockSize(J,nc)); 1249 /* determine the matrix preallocation information */ 1250 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 1251 for (i=xs; i<xs+nx; i++) { 1252 1253 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1254 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1255 1256 for (j=ys; j<ys+ny; j++) { 1257 slot = i - gxs + gnx*(j - gys); 1258 1259 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1260 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1261 1262 for (k=0; k<nc; k++) { 1263 cnt = 0; 1264 for (l=lstart; l<lend+1; l++) { 1265 for (p=pstart; p<pend+1; p++) { 1266 if (l || p) { 1267 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1268 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1269 } 1270 } else { 1271 if (dfill) { 1272 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1273 } else { 1274 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1275 } 1276 } 1277 } 1278 } 1279 row = k + nc*(slot); 1280 maxcnt = PetscMax(maxcnt,cnt); 1281 if (removedups) { 1282 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1283 } else { 1284 PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1285 } 1286 } 1287 } 1288 } 1289 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1290 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1291 MatPreallocateEnd(dnz,onz); 1292 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1293 1294 /* 1295 For each node in the grid: we get the neighbors in the local (on processor ordering 1296 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1297 PETSc ordering. 1298 */ 1299 if (!da->prealloc_only) { 1300 for (i=xs; i<xs+nx; i++) { 1301 1302 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1303 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1304 1305 for (j=ys; j<ys+ny; j++) { 1306 slot = i - gxs + gnx*(j - gys); 1307 1308 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1309 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1310 1311 for (k=0; k<nc; k++) { 1312 cnt = 0; 1313 for (l=lstart; l<lend+1; l++) { 1314 for (p=pstart; p<pend+1; p++) { 1315 if (l || p) { 1316 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1317 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1318 } 1319 } else { 1320 if (dfill) { 1321 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1322 } else { 1323 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1324 } 1325 } 1326 } 1327 } 1328 row = k + nc*(slot); 1329 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1330 } 1331 } 1332 } 1333 /* do not copy values to GPU since they are all zero and not yet needed there */ 1334 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1335 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1336 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1337 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1338 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1339 } 1340 PetscCall(PetscFree(cols)); 1341 PetscFunctionReturn(0); 1342 } 1343 1344 /* ---------------------------------------------------------------------------------*/ 1345 1346 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1347 { 1348 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1349 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1350 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1351 MPI_Comm comm; 1352 DMBoundaryType bx,by,bz; 1353 ISLocalToGlobalMapping ltog,mltog; 1354 DMDAStencilType st; 1355 PetscBool removedups = PETSC_FALSE; 1356 1357 PetscFunctionBegin; 1358 /* 1359 nc - number of components per grid point 1360 col - number of colors needed in one direction for single component problem 1361 1362 */ 1363 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1364 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1365 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1366 } 1367 col = 2*s + 1; 1368 1369 /* 1370 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1371 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1372 */ 1373 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1374 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1375 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1376 1377 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1378 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1379 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1380 1381 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1382 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1383 1384 PetscCall(MatSetBlockSize(J,nc)); 1385 /* determine the matrix preallocation information */ 1386 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 1387 for (i=xs; i<xs+nx; i++) { 1388 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1389 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1390 for (j=ys; j<ys+ny; j++) { 1391 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1392 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1393 for (k=zs; k<zs+nz; k++) { 1394 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1395 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1396 1397 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1398 1399 cnt = 0; 1400 for (l=0; l<nc; l++) { 1401 for (ii=istart; ii<iend+1; ii++) { 1402 for (jj=jstart; jj<jend+1; jj++) { 1403 for (kk=kstart; kk<kend+1; kk++) { 1404 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1405 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1406 } 1407 } 1408 } 1409 } 1410 rows[l] = l + nc*(slot); 1411 } 1412 if (removedups) { 1413 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1414 } else { 1415 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1416 } 1417 } 1418 } 1419 } 1420 PetscCall(MatSetBlockSize(J,nc)); 1421 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1422 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1423 MatPreallocateEnd(dnz,onz); 1424 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1425 if (!mltog) { 1426 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1427 } 1428 1429 /* 1430 For each node in the grid: we get the neighbors in the local (on processor ordering 1431 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1432 PETSc ordering. 1433 */ 1434 if (!da->prealloc_only) { 1435 for (i=xs; i<xs+nx; i++) { 1436 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1437 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1438 for (j=ys; j<ys+ny; j++) { 1439 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1440 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1441 for (k=zs; k<zs+nz; k++) { 1442 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1443 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1444 1445 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1446 1447 cnt = 0; 1448 for (kk=kstart; kk<kend+1; kk++) { 1449 for (jj=jstart; jj<jend+1; jj++) { 1450 for (ii=istart; ii<iend+1; ii++) { 1451 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1452 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1453 for (l=1; l<nc; l++) { 1454 cols[cnt] = 1 + cols[cnt-1];cnt++; 1455 } 1456 } 1457 } 1458 } 1459 } 1460 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1461 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1462 } 1463 } 1464 } 1465 /* do not copy values to GPU since they are all zero and not yet needed there */ 1466 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1467 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1468 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1469 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1470 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1471 } 1472 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1473 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1474 } 1475 PetscCall(PetscFree2(rows,cols)); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 /* ---------------------------------------------------------------------------------*/ 1480 1481 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1482 { 1483 DM_DA *dd = (DM_DA*)da->data; 1484 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1485 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1486 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1487 DMBoundaryType bx; 1488 ISLocalToGlobalMapping ltog; 1489 PetscMPIInt rank,size; 1490 1491 PetscFunctionBegin; 1492 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank)); 1493 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 1494 1495 /* 1496 nc - number of components per grid point 1497 1498 */ 1499 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1500 PetscCheck(s <= 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1501 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1502 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1503 1504 PetscCall(MatSetBlockSize(J,nc)); 1505 PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols)); 1506 1507 /* 1508 note should be smaller for first and last process with no periodic 1509 does not handle dfill 1510 */ 1511 cnt = 0; 1512 /* coupling with process to the left */ 1513 for (i=0; i<s; i++) { 1514 for (j=0; j<nc; j++) { 1515 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1516 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1517 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1518 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1519 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1520 } 1521 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1522 cnt++; 1523 } 1524 } 1525 for (i=s; i<nx-s; i++) { 1526 for (j=0; j<nc; j++) { 1527 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1528 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1529 cnt++; 1530 } 1531 } 1532 /* coupling with process to the right */ 1533 for (i=nx-s; i<nx; i++) { 1534 for (j=0; j<nc; j++) { 1535 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1536 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1537 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1538 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1539 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1540 } 1541 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1542 cnt++; 1543 } 1544 } 1545 1546 PetscCall(MatSeqAIJSetPreallocation(J,0,cols)); 1547 PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols)); 1548 PetscCall(PetscFree2(cols,ocols)); 1549 1550 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1551 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1552 1553 /* 1554 For each node in the grid: we get the neighbors in the local (on processor ordering 1555 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1556 PETSc ordering. 1557 */ 1558 if (!da->prealloc_only) { 1559 PetscCall(PetscMalloc1(maxcnt,&cols)); 1560 row = xs*nc; 1561 /* coupling with process to the left */ 1562 for (i=xs; i<xs+s; i++) { 1563 for (j=0; j<nc; j++) { 1564 cnt = 0; 1565 if (rank) { 1566 for (l=0; l<s; l++) { 1567 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1568 } 1569 } 1570 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1571 for (l=0; l<s; l++) { 1572 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1573 } 1574 } 1575 if (dfill) { 1576 for (k=dfill[j]; k<dfill[j+1]; k++) { 1577 cols[cnt++] = i*nc + dfill[k]; 1578 } 1579 } else { 1580 for (k=0; k<nc; k++) { 1581 cols[cnt++] = i*nc + k; 1582 } 1583 } 1584 for (l=0; l<s; l++) { 1585 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1586 } 1587 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1588 row++; 1589 } 1590 } 1591 for (i=xs+s; i<xs+nx-s; i++) { 1592 for (j=0; j<nc; j++) { 1593 cnt = 0; 1594 for (l=0; l<s; l++) { 1595 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1596 } 1597 if (dfill) { 1598 for (k=dfill[j]; k<dfill[j+1]; k++) { 1599 cols[cnt++] = i*nc + dfill[k]; 1600 } 1601 } else { 1602 for (k=0; k<nc; k++) { 1603 cols[cnt++] = i*nc + k; 1604 } 1605 } 1606 for (l=0; l<s; l++) { 1607 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1608 } 1609 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1610 row++; 1611 } 1612 } 1613 /* coupling with process to the right */ 1614 for (i=xs+nx-s; i<xs+nx; i++) { 1615 for (j=0; j<nc; j++) { 1616 cnt = 0; 1617 for (l=0; l<s; l++) { 1618 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1619 } 1620 if (dfill) { 1621 for (k=dfill[j]; k<dfill[j+1]; k++) { 1622 cols[cnt++] = i*nc + dfill[k]; 1623 } 1624 } else { 1625 for (k=0; k<nc; k++) { 1626 cols[cnt++] = i*nc + k; 1627 } 1628 } 1629 if (rank < size-1) { 1630 for (l=0; l<s; l++) { 1631 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1632 } 1633 } 1634 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1635 for (l=0; l<s; l++) { 1636 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1637 } 1638 } 1639 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1640 row++; 1641 } 1642 } 1643 PetscCall(PetscFree(cols)); 1644 /* do not copy values to GPU since they are all zero and not yet needed there */ 1645 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1646 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1647 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1648 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1649 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 /* ---------------------------------------------------------------------------------*/ 1655 1656 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1657 { 1658 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1659 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1660 PetscInt istart,iend; 1661 DMBoundaryType bx; 1662 ISLocalToGlobalMapping ltog,mltog; 1663 1664 PetscFunctionBegin; 1665 /* 1666 nc - number of components per grid point 1667 col - number of colors needed in one direction for single component problem 1668 1669 */ 1670 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1671 if (bx == DM_BOUNDARY_NONE) { 1672 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1673 } 1674 col = 2*s + 1; 1675 1676 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1677 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1678 1679 PetscCall(MatSetBlockSize(J,nc)); 1680 PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL)); 1681 PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL)); 1682 1683 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1684 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1685 if (!mltog) { 1686 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1687 } 1688 1689 /* 1690 For each node in the grid: we get the neighbors in the local (on processor ordering 1691 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1692 PETSc ordering. 1693 */ 1694 if (!da->prealloc_only) { 1695 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1696 for (i=xs; i<xs+nx; i++) { 1697 istart = PetscMax(-s,gxs - i); 1698 iend = PetscMin(s,gxs + gnx - i - 1); 1699 slot = i - gxs; 1700 1701 cnt = 0; 1702 for (i1=istart; i1<iend+1; i1++) { 1703 cols[cnt++] = nc*(slot + i1); 1704 for (l=1; l<nc; l++) { 1705 cols[cnt] = 1 + cols[cnt-1];cnt++; 1706 } 1707 } 1708 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1709 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1710 } 1711 /* do not copy values to GPU since they are all zero and not yet needed there */ 1712 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1713 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1714 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1715 if (bx == DM_BOUNDARY_NONE) { 1716 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1717 } 1718 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1719 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1720 PetscCall(PetscFree2(rows,cols)); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 /* ---------------------------------------------------------------------------------*/ 1726 1727 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1728 { 1729 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1730 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1731 PetscInt istart,iend; 1732 DMBoundaryType bx; 1733 ISLocalToGlobalMapping ltog,mltog; 1734 1735 PetscFunctionBegin; 1736 /* 1737 nc - number of components per grid point 1738 col - number of colors needed in one direction for single component problem 1739 */ 1740 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1741 col = 2*s + 1; 1742 1743 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1744 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1745 1746 PetscCall(MatSetBlockSize(J,nc)); 1747 PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc)); 1748 1749 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1750 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1751 if (!mltog) { 1752 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1753 } 1754 1755 /* 1756 For each node in the grid: we get the neighbors in the local (on processor ordering 1757 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1758 PETSc ordering. 1759 */ 1760 if (!da->prealloc_only) { 1761 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1762 for (i=xs; i<xs+nx; i++) { 1763 istart = PetscMax(-s,gxs - i); 1764 iend = PetscMin(s,gxs + gnx - i - 1); 1765 slot = i - gxs; 1766 1767 cnt = 0; 1768 for (i1=istart; i1<iend+1; i1++) { 1769 cols[cnt++] = nc*(slot + i1); 1770 for (l=1; l<nc; l++) { 1771 cols[cnt] = 1 + cols[cnt-1];cnt++; 1772 } 1773 } 1774 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1775 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1776 } 1777 /* do not copy values to GPU since they are all zero and not yet needed there */ 1778 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1779 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1780 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1781 if (bx == DM_BOUNDARY_NONE) { 1782 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1783 } 1784 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1785 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1786 PetscCall(PetscFree2(rows,cols)); 1787 } 1788 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1793 { 1794 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1795 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1796 PetscInt istart,iend,jstart,jend,ii,jj; 1797 MPI_Comm comm; 1798 PetscScalar *values; 1799 DMBoundaryType bx,by; 1800 DMDAStencilType st; 1801 ISLocalToGlobalMapping ltog; 1802 1803 PetscFunctionBegin; 1804 /* 1805 nc - number of components per grid point 1806 col - number of colors needed in one direction for single component problem 1807 */ 1808 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1809 col = 2*s + 1; 1810 1811 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1812 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1813 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1814 1815 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 1816 1817 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1818 1819 /* determine the matrix preallocation information */ 1820 MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz); 1821 for (i=xs; i<xs+nx; i++) { 1822 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1823 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1824 for (j=ys; j<ys+ny; j++) { 1825 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1826 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1827 slot = i - gxs + gnx*(j - gys); 1828 1829 /* Find block columns in block row */ 1830 cnt = 0; 1831 for (ii=istart; ii<iend+1; ii++) { 1832 for (jj=jstart; jj<jend+1; jj++) { 1833 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1834 cols[cnt++] = slot + ii + gnx*jj; 1835 } 1836 } 1837 } 1838 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1839 } 1840 } 1841 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1842 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1843 MatPreallocateEnd(dnz,onz); 1844 1845 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1846 1847 /* 1848 For each node in the grid: we get the neighbors in the local (on processor ordering 1849 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1850 PETSc ordering. 1851 */ 1852 if (!da->prealloc_only) { 1853 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 1854 for (i=xs; i<xs+nx; i++) { 1855 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1856 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1857 for (j=ys; j<ys+ny; j++) { 1858 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1859 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1860 slot = i - gxs + gnx*(j - gys); 1861 cnt = 0; 1862 for (ii=istart; ii<iend+1; ii++) { 1863 for (jj=jstart; jj<jend+1; jj++) { 1864 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1865 cols[cnt++] = slot + ii + gnx*jj; 1866 } 1867 } 1868 } 1869 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1870 } 1871 } 1872 PetscCall(PetscFree(values)); 1873 /* do not copy values to GPU since they are all zero and not yet needed there */ 1874 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1875 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1876 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1877 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1878 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1879 } 1880 PetscCall(PetscFree(cols)); 1881 PetscFunctionReturn(0); 1882 } 1883 1884 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1885 { 1886 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1887 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1888 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1889 MPI_Comm comm; 1890 PetscScalar *values; 1891 DMBoundaryType bx,by,bz; 1892 DMDAStencilType st; 1893 ISLocalToGlobalMapping ltog; 1894 1895 PetscFunctionBegin; 1896 /* 1897 nc - number of components per grid point 1898 col - number of colors needed in one direction for single component problem 1899 1900 */ 1901 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 1902 col = 2*s + 1; 1903 1904 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1905 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1906 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1907 1908 PetscCall(PetscMalloc1(col*col*col,&cols)); 1909 1910 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1911 1912 /* determine the matrix preallocation information */ 1913 MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz); 1914 for (i=xs; i<xs+nx; i++) { 1915 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1916 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1917 for (j=ys; j<ys+ny; j++) { 1918 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1919 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1920 for (k=zs; k<zs+nz; k++) { 1921 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1922 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1923 1924 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1925 1926 /* Find block columns in block row */ 1927 cnt = 0; 1928 for (ii=istart; ii<iend+1; ii++) { 1929 for (jj=jstart; jj<jend+1; jj++) { 1930 for (kk=kstart; kk<kend+1; kk++) { 1931 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1932 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1933 } 1934 } 1935 } 1936 } 1937 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1938 } 1939 } 1940 } 1941 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1942 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1943 MatPreallocateEnd(dnz,onz); 1944 1945 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1946 1947 /* 1948 For each node in the grid: we get the neighbors in the local (on processor ordering 1949 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1950 PETSc ordering. 1951 */ 1952 if (!da->prealloc_only) { 1953 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 1954 for (i=xs; i<xs+nx; i++) { 1955 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1956 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1957 for (j=ys; j<ys+ny; j++) { 1958 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1959 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1960 for (k=zs; k<zs+nz; k++) { 1961 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1962 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1963 1964 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1965 1966 cnt = 0; 1967 for (ii=istart; ii<iend+1; ii++) { 1968 for (jj=jstart; jj<jend+1; jj++) { 1969 for (kk=kstart; kk<kend+1; kk++) { 1970 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1971 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1972 } 1973 } 1974 } 1975 } 1976 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1977 } 1978 } 1979 } 1980 PetscCall(PetscFree(values)); 1981 /* do not copy values to GPU since they are all zero and not yet needed there */ 1982 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1983 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1984 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1985 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1986 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1987 } 1988 PetscCall(PetscFree(cols)); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 /* 1993 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1994 identify in the local ordering with periodic domain. 1995 */ 1996 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 1997 { 1998 PetscInt i,n; 1999 2000 PetscFunctionBegin; 2001 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row)); 2002 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col)); 2003 for (i=0,n=0; i<*cnt; i++) { 2004 if (col[i] >= *row) col[n++] = col[i]; 2005 } 2006 *cnt = n; 2007 PetscFunctionReturn(0); 2008 } 2009 2010 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 2011 { 2012 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2013 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 2014 PetscInt istart,iend,jstart,jend,ii,jj; 2015 MPI_Comm comm; 2016 PetscScalar *values; 2017 DMBoundaryType bx,by; 2018 DMDAStencilType st; 2019 ISLocalToGlobalMapping ltog; 2020 2021 PetscFunctionBegin; 2022 /* 2023 nc - number of components per grid point 2024 col - number of colors needed in one direction for single component problem 2025 */ 2026 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 2027 col = 2*s + 1; 2028 2029 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 2030 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 2031 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2032 2033 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 2034 2035 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2036 2037 /* determine the matrix preallocation information */ 2038 MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz); 2039 for (i=xs; i<xs+nx; i++) { 2040 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2041 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2042 for (j=ys; j<ys+ny; j++) { 2043 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2044 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2045 slot = i - gxs + gnx*(j - gys); 2046 2047 /* Find block columns in block row */ 2048 cnt = 0; 2049 for (ii=istart; ii<iend+1; ii++) { 2050 for (jj=jstart; jj<jend+1; jj++) { 2051 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2052 cols[cnt++] = slot + ii + gnx*jj; 2053 } 2054 } 2055 } 2056 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2057 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2058 } 2059 } 2060 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2061 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2062 MatPreallocateEnd(dnz,onz); 2063 2064 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2065 2066 /* 2067 For each node in the grid: we get the neighbors in the local (on processor ordering 2068 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2069 PETSc ordering. 2070 */ 2071 if (!da->prealloc_only) { 2072 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 2073 for (i=xs; i<xs+nx; i++) { 2074 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2075 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2076 for (j=ys; j<ys+ny; j++) { 2077 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2078 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2079 slot = i - gxs + gnx*(j - gys); 2080 2081 /* Find block columns in block row */ 2082 cnt = 0; 2083 for (ii=istart; ii<iend+1; ii++) { 2084 for (jj=jstart; jj<jend+1; jj++) { 2085 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2086 cols[cnt++] = slot + ii + gnx*jj; 2087 } 2088 } 2089 } 2090 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2091 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2092 } 2093 } 2094 PetscCall(PetscFree(values)); 2095 /* do not copy values to GPU since they are all zero and not yet needed there */ 2096 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2097 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2098 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2099 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2100 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2101 } 2102 PetscCall(PetscFree(cols)); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2107 { 2108 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2109 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2110 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2111 MPI_Comm comm; 2112 PetscScalar *values; 2113 DMBoundaryType bx,by,bz; 2114 DMDAStencilType st; 2115 ISLocalToGlobalMapping ltog; 2116 2117 PetscFunctionBegin; 2118 /* 2119 nc - number of components per grid point 2120 col - number of colors needed in one direction for single component problem 2121 */ 2122 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 2123 col = 2*s + 1; 2124 2125 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2126 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2127 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2128 2129 /* create the matrix */ 2130 PetscCall(PetscMalloc1(col*col*col,&cols)); 2131 2132 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2133 2134 /* determine the matrix preallocation information */ 2135 MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz); 2136 for (i=xs; i<xs+nx; i++) { 2137 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2138 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2139 for (j=ys; j<ys+ny; j++) { 2140 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2141 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2142 for (k=zs; k<zs+nz; k++) { 2143 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2144 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2145 2146 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2147 2148 /* Find block columns in block row */ 2149 cnt = 0; 2150 for (ii=istart; ii<iend+1; ii++) { 2151 for (jj=jstart; jj<jend+1; jj++) { 2152 for (kk=kstart; kk<kend+1; kk++) { 2153 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2154 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2155 } 2156 } 2157 } 2158 } 2159 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2160 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2161 } 2162 } 2163 } 2164 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2165 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2166 MatPreallocateEnd(dnz,onz); 2167 2168 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2169 2170 /* 2171 For each node in the grid: we get the neighbors in the local (on processor ordering 2172 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2173 PETSc ordering. 2174 */ 2175 if (!da->prealloc_only) { 2176 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 2177 for (i=xs; i<xs+nx; i++) { 2178 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2179 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2180 for (j=ys; j<ys+ny; j++) { 2181 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2182 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2183 for (k=zs; k<zs+nz; k++) { 2184 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2185 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2186 2187 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2188 2189 cnt = 0; 2190 for (ii=istart; ii<iend+1; ii++) { 2191 for (jj=jstart; jj<jend+1; jj++) { 2192 for (kk=kstart; kk<kend+1; kk++) { 2193 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2194 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2195 } 2196 } 2197 } 2198 } 2199 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2200 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2201 } 2202 } 2203 } 2204 PetscCall(PetscFree(values)); 2205 /* do not copy values to GPU since they are all zero and not yet needed there */ 2206 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2207 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2208 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2209 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2210 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2211 } 2212 PetscCall(PetscFree(cols)); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 /* ---------------------------------------------------------------------------------*/ 2217 2218 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2219 { 2220 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2221 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2222 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2223 DM_DA *dd = (DM_DA*)da->data; 2224 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2225 MPI_Comm comm; 2226 PetscScalar *values; 2227 DMBoundaryType bx,by,bz; 2228 ISLocalToGlobalMapping ltog; 2229 DMDAStencilType st; 2230 PetscBool removedups = PETSC_FALSE; 2231 2232 PetscFunctionBegin; 2233 /* 2234 nc - number of components per grid point 2235 col - number of colors needed in one direction for single component problem 2236 2237 */ 2238 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 2239 col = 2*s + 1; 2240 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2241 by 2*stencil_width + 1\n"); 2242 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2243 by 2*stencil_width + 1\n"); 2244 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2245 by 2*stencil_width + 1\n"); 2246 2247 /* 2248 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2249 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2250 */ 2251 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2252 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2253 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2254 2255 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2256 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2257 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2258 2259 PetscCall(PetscMalloc1(col*col*col*nc,&cols)); 2260 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2261 2262 /* determine the matrix preallocation information */ 2263 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 2264 2265 PetscCall(MatSetBlockSize(J,nc)); 2266 for (i=xs; i<xs+nx; i++) { 2267 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2268 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2269 for (j=ys; j<ys+ny; j++) { 2270 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2271 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2272 for (k=zs; k<zs+nz; k++) { 2273 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2274 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2275 2276 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2277 2278 for (l=0; l<nc; l++) { 2279 cnt = 0; 2280 for (ii=istart; ii<iend+1; ii++) { 2281 for (jj=jstart; jj<jend+1; jj++) { 2282 for (kk=kstart; kk<kend+1; kk++) { 2283 if (ii || jj || kk) { 2284 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2285 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2286 } 2287 } else { 2288 if (dfill) { 2289 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2290 } else { 2291 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2292 } 2293 } 2294 } 2295 } 2296 } 2297 row = l + nc*(slot); 2298 maxcnt = PetscMax(maxcnt,cnt); 2299 if (removedups) { 2300 PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2301 } else { 2302 PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2303 } 2304 } 2305 } 2306 } 2307 } 2308 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 2309 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 2310 MatPreallocateEnd(dnz,onz); 2311 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2312 2313 /* 2314 For each node in the grid: we get the neighbors in the local (on processor ordering 2315 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2316 PETSc ordering. 2317 */ 2318 if (!da->prealloc_only) { 2319 PetscCall(PetscCalloc1(maxcnt,&values)); 2320 for (i=xs; i<xs+nx; i++) { 2321 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2322 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2323 for (j=ys; j<ys+ny; j++) { 2324 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2325 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2326 for (k=zs; k<zs+nz; k++) { 2327 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2328 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2329 2330 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2331 2332 for (l=0; l<nc; l++) { 2333 cnt = 0; 2334 for (ii=istart; ii<iend+1; ii++) { 2335 for (jj=jstart; jj<jend+1; jj++) { 2336 for (kk=kstart; kk<kend+1; kk++) { 2337 if (ii || jj || kk) { 2338 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2339 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2340 } 2341 } else { 2342 if (dfill) { 2343 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2344 } else { 2345 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2346 } 2347 } 2348 } 2349 } 2350 } 2351 row = l + nc*(slot); 2352 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES)); 2353 } 2354 } 2355 } 2356 } 2357 PetscCall(PetscFree(values)); 2358 /* do not copy values to GPU since they are all zero and not yet needed there */ 2359 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2360 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2361 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2362 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2363 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2364 } 2365 PetscCall(PetscFree(cols)); 2366 PetscFunctionReturn(0); 2367 } 2368