1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill) 18 { 19 PetscInt i,j,nz,*fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i=0; i<w; i++) { 27 for (j=0; j<w; j++) { 28 if (dfill[w*i+j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1,&fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i=0; i<w; i++) { 37 fill[i] = nz; 38 for (j=0; j<w; j++) { 39 if (dfill[w*i+j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1,rfill)); 63 PetscCall(PetscArraycpy(*rfill,dfillsparse,nz+w+1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i,k,cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w,&dd->ofillcols)); 76 for (i=0; i<dd->w; i++) { 77 for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i=0; i<dd->w; i++) { 80 if (dd->ofillcols[i]) { 81 dd->ofillcols[i] = cnt++; 82 } 83 } 84 PetscFunctionReturn(0); 85 } 86 87 /*@ 88 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 89 of the matrix returned by DMCreateMatrix(). 90 91 Logically Collective on da 92 93 Input Parameters: 94 + da - the distributed array 95 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 96 - ofill - the fill pattern in the off-diagonal blocks 97 98 Level: developer 99 100 Notes: 101 This only makes sense when you are doing multicomponent problems but using the 102 MPIAIJ matrix format 103 104 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 105 representing coupling and 0 entries for missing coupling. For example 106 $ dfill[9] = {1, 0, 0, 107 $ 1, 1, 0, 108 $ 0, 1, 1} 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 120 @*/ 121 PetscErrorCode DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill) 122 { 123 DM_DA *dd = (DM_DA*)da->data; 124 125 PetscFunctionBegin; 126 /* save the given dfill and ofill information */ 127 PetscCall(DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill)); 128 PetscCall(DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill)); 129 130 /* count nonzeros in ofill columns */ 131 PetscCall(DMDASetBlockFills_Private2(dd)); 132 PetscFunctionReturn(0); 133 } 134 135 /*@ 136 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 137 of the matrix returned by DMCreateMatrix(), using sparse representations 138 of fill patterns. 139 140 Logically Collective on da 141 142 Input Parameters: 143 + da - the distributed array 144 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 145 - ofill - the sparse fill pattern in the off-diagonal blocks 146 147 Level: developer 148 149 Notes: This only makes sense when you are doing multicomponent problems but using the 150 MPIAIJ matrix format 151 152 The format for dfill and ofill is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the DMDASetBlockFills_Private() 164 function from a dense 2D matrix representation. 165 166 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the dfill, ofill format 168 169 Contributed by Philip C. Roth 170 171 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 173 @*/ 174 PetscErrorCode DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse) 175 { 176 DM_DA *dd = (DM_DA*)da->data; 177 178 PetscFunctionBegin; 179 /* save the given dfill and ofill information */ 180 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill)); 181 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill)); 182 183 /* count nonzeros in ofill columns */ 184 PetscCall(DMDASetBlockFills_Private2(dd)); 185 PetscFunctionReturn(0); 186 } 187 188 PetscErrorCode DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring) 189 { 190 PetscInt dim,m,n,p,nc; 191 DMBoundaryType bx,by,bz; 192 MPI_Comm comm; 193 PetscMPIInt size; 194 PetscBool isBAIJ; 195 DM_DA *dd = (DM_DA*)da->data; 196 197 PetscFunctionBegin; 198 /* 199 m 200 ------------------------------------------------------ 201 | | 202 | | 203 | ---------------------- | 204 | | | | 205 n | yn | | | 206 | | | | 207 | .--------------------- | 208 | (xs,ys) xn | 209 | . | 210 | (gxs,gys) | 211 | | 212 ----------------------------------------------------- 213 */ 214 215 /* 216 nc - number of components per grid point 217 col - number of colors needed in one direction for single component problem 218 219 */ 220 PetscCall(DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL)); 221 222 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 223 PetscCallMPI(MPI_Comm_size(comm,&size)); 224 if (ctype == IS_COLORING_LOCAL) { 225 if (size == 1) { 226 ctype = IS_COLORING_GLOBAL; 227 } else if (dim > 1) { 228 if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) { 229 SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 230 } 231 } 232 } 233 234 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 235 matrices is for the blocks, not the individual matrix elements */ 236 PetscCall(PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ)); 237 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ)); 238 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ)); 239 if (isBAIJ) { 240 dd->w = 1; 241 dd->xs = dd->xs/nc; 242 dd->xe = dd->xe/nc; 243 dd->Xs = dd->Xs/nc; 244 dd->Xe = dd->Xe/nc; 245 } 246 247 /* 248 We do not provide a getcoloring function in the DMDA operations because 249 the basic DMDA does not know about matrices. We think of DMDA as being 250 more low-level then matrices. 251 */ 252 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring)); 253 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring)); 254 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring)); 255 else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code",dim); 256 if (isBAIJ) { 257 dd->w = nc; 258 dd->xs = dd->xs*nc; 259 dd->xe = dd->xe*nc; 260 dd->Xs = dd->Xs*nc; 261 dd->Xe = dd->Xe*nc; 262 } 263 PetscFunctionReturn(0); 264 } 265 266 /* ---------------------------------------------------------------------------------*/ 267 268 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 269 { 270 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col; 271 PetscInt ncolors; 272 MPI_Comm comm; 273 DMBoundaryType bx,by; 274 DMDAStencilType st; 275 ISColoringValue *colors; 276 DM_DA *dd = (DM_DA*)da->data; 277 278 PetscFunctionBegin; 279 /* 280 nc - number of components per grid point 281 col - number of colors needed in one direction for single component problem 282 283 */ 284 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 285 col = 2*s + 1; 286 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 287 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 288 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 289 290 /* special case as taught to us by Paul Hovland */ 291 if (st == DMDA_STENCIL_STAR && s == 1) { 292 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring)); 293 } else { 294 if (ctype == IS_COLORING_GLOBAL) { 295 if (!dd->localcoloring) { 296 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 297 ii = 0; 298 for (j=ys; j<ys+ny; j++) { 299 for (i=xs; i<xs+nx; i++) { 300 for (k=0; k<nc; k++) { 301 colors[ii++] = k + nc*((i % col) + col*(j % col)); 302 } 303 } 304 } 305 ncolors = nc + nc*(col-1 + col*(col-1)); 306 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 307 } 308 *coloring = dd->localcoloring; 309 } else if (ctype == IS_COLORING_LOCAL) { 310 if (!dd->ghostedcoloring) { 311 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 312 ii = 0; 313 for (j=gys; j<gys+gny; j++) { 314 for (i=gxs; i<gxs+gnx; i++) { 315 for (k=0; k<nc; k++) { 316 /* the complicated stuff is to handle periodic boundaries */ 317 colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col)); 318 } 319 } 320 } 321 ncolors = nc + nc*(col - 1 + col*(col-1)); 322 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 323 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 324 325 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 326 } 327 *coloring = dd->ghostedcoloring; 328 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 329 } 330 PetscCall(ISColoringReference(*coloring)); 331 PetscFunctionReturn(0); 332 } 333 334 /* ---------------------------------------------------------------------------------*/ 335 336 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 337 { 338 PetscInt xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P; 339 PetscInt ncolors; 340 MPI_Comm comm; 341 DMBoundaryType bx,by,bz; 342 DMDAStencilType st; 343 ISColoringValue *colors; 344 DM_DA *dd = (DM_DA*)da->data; 345 346 PetscFunctionBegin; 347 /* 348 nc - number of components per grid point 349 col - number of colors needed in one direction for single component problem 350 351 */ 352 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 353 col = 2*s + 1; 354 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 355 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 356 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 357 358 /* create the coloring */ 359 if (ctype == IS_COLORING_GLOBAL) { 360 if (!dd->localcoloring) { 361 PetscCall(PetscMalloc1(nc*nx*ny*nz,&colors)); 362 ii = 0; 363 for (k=zs; k<zs+nz; k++) { 364 for (j=ys; j<ys+ny; j++) { 365 for (i=xs; i<xs+nx; i++) { 366 for (l=0; l<nc; l++) { 367 colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col)); 368 } 369 } 370 } 371 } 372 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 373 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 374 } 375 *coloring = dd->localcoloring; 376 } else if (ctype == IS_COLORING_LOCAL) { 377 if (!dd->ghostedcoloring) { 378 PetscCall(PetscMalloc1(nc*gnx*gny*gnz,&colors)); 379 ii = 0; 380 for (k=gzs; k<gzs+gnz; k++) { 381 for (j=gys; j<gys+gny; j++) { 382 for (i=gxs; i<gxs+gnx; i++) { 383 for (l=0; l<nc; l++) { 384 /* the complicated stuff is to handle periodic boundaries */ 385 colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col)); 386 } 387 } 388 } 389 } 390 ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1)); 391 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 392 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 393 } 394 *coloring = dd->ghostedcoloring; 395 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 396 PetscCall(ISColoringReference(*coloring)); 397 PetscFunctionReturn(0); 398 } 399 400 /* ---------------------------------------------------------------------------------*/ 401 402 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 403 { 404 PetscInt xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col; 405 PetscInt ncolors; 406 MPI_Comm comm; 407 DMBoundaryType bx; 408 ISColoringValue *colors; 409 DM_DA *dd = (DM_DA*)da->data; 410 411 PetscFunctionBegin; 412 /* 413 nc - number of components per grid point 414 col - number of colors needed in one direction for single component problem 415 416 */ 417 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 418 col = 2*s + 1; 419 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 420 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 421 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 422 423 /* create the coloring */ 424 if (ctype == IS_COLORING_GLOBAL) { 425 if (!dd->localcoloring) { 426 PetscCall(PetscMalloc1(nc*nx,&colors)); 427 if (dd->ofillcols) { 428 PetscInt tc = 0; 429 for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0); 430 i1 = 0; 431 for (i=xs; i<xs+nx; i++) { 432 for (l=0; l<nc; l++) { 433 if (dd->ofillcols[l] && (i % col)) { 434 colors[i1++] = nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l]; 435 } else { 436 colors[i1++] = l; 437 } 438 } 439 } 440 ncolors = nc + 2*s*tc; 441 } else { 442 i1 = 0; 443 for (i=xs; i<xs+nx; i++) { 444 for (l=0; l<nc; l++) { 445 colors[i1++] = l + nc*(i % col); 446 } 447 } 448 ncolors = nc + nc*(col-1); 449 } 450 PetscCall(ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 451 } 452 *coloring = dd->localcoloring; 453 } else if (ctype == IS_COLORING_LOCAL) { 454 if (!dd->ghostedcoloring) { 455 PetscCall(PetscMalloc1(nc*gnx,&colors)); 456 i1 = 0; 457 for (i=gxs; i<gxs+gnx; i++) { 458 for (l=0; l<nc; l++) { 459 /* the complicated stuff is to handle periodic boundaries */ 460 colors[i1++] = l + nc*(SetInRange(i,m) % col); 461 } 462 } 463 ncolors = nc + nc*(col-1); 464 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 465 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 466 } 467 *coloring = dd->ghostedcoloring; 468 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 469 PetscCall(ISColoringReference(*coloring)); 470 PetscFunctionReturn(0); 471 } 472 473 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring) 474 { 475 PetscInt xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc; 476 PetscInt ncolors; 477 MPI_Comm comm; 478 DMBoundaryType bx,by; 479 ISColoringValue *colors; 480 DM_DA *dd = (DM_DA*)da->data; 481 482 PetscFunctionBegin; 483 /* 484 nc - number of components per grid point 485 col - number of colors needed in one direction for single component problem 486 487 */ 488 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL)); 489 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 490 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 491 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 492 /* create the coloring */ 493 if (ctype == IS_COLORING_GLOBAL) { 494 if (!dd->localcoloring) { 495 PetscCall(PetscMalloc1(nc*nx*ny,&colors)); 496 ii = 0; 497 for (j=ys; j<ys+ny; j++) { 498 for (i=xs; i<xs+nx; i++) { 499 for (k=0; k<nc; k++) { 500 colors[ii++] = k + nc*((3*j+i) % 5); 501 } 502 } 503 } 504 ncolors = 5*nc; 505 PetscCall(ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring)); 506 } 507 *coloring = dd->localcoloring; 508 } else if (ctype == IS_COLORING_LOCAL) { 509 if (!dd->ghostedcoloring) { 510 PetscCall(PetscMalloc1(nc*gnx*gny,&colors)); 511 ii = 0; 512 for (j=gys; j<gys+gny; j++) { 513 for (i=gxs; i<gxs+gnx; i++) { 514 for (k=0; k<nc; k++) { 515 colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5); 516 } 517 } 518 } 519 ncolors = 5*nc; 520 PetscCall(ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring)); 521 PetscCall(ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL)); 522 } 523 *coloring = dd->ghostedcoloring; 524 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype); 525 PetscFunctionReturn(0); 526 } 527 528 /* =========================================================================== */ 529 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat); 530 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat); 531 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat); 532 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat); 533 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat); 534 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat); 535 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat); 536 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat); 537 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat); 538 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat); 539 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat); 540 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat); 541 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat); 542 extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat); 543 544 /*@C 545 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 546 547 Logically Collective on mat 548 549 Input Parameters: 550 + mat - the matrix 551 - da - the da 552 553 Level: intermediate 554 555 @*/ 556 PetscErrorCode MatSetupDM(Mat mat,DM da) 557 { 558 PetscFunctionBegin; 559 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 560 PetscValidHeaderSpecificType(da,DM_CLASSID,2,DMDA); 561 PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da)); 562 PetscFunctionReturn(0); 563 } 564 565 PetscErrorCode MatView_MPI_DA(Mat A,PetscViewer viewer) 566 { 567 DM da; 568 const char *prefix; 569 Mat Anatural; 570 AO ao; 571 PetscInt rstart,rend,*petsc,i; 572 IS is; 573 MPI_Comm comm; 574 PetscViewerFormat format; 575 576 PetscFunctionBegin; 577 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 578 PetscCall(PetscViewerGetFormat(viewer,&format)); 579 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 580 581 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 582 PetscCall(MatGetDM(A, &da)); 583 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 584 585 PetscCall(DMDAGetAO(da,&ao)); 586 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 587 PetscCall(PetscMalloc1(rend-rstart,&petsc)); 588 for (i=rstart; i<rend; i++) petsc[i-rstart] = i; 589 PetscCall(AOApplicationToPetsc(ao,rend-rstart,petsc)); 590 PetscCall(ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is)); 591 592 /* call viewer on natural ordering */ 593 PetscCall(MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural)); 594 PetscCall(ISDestroy(&is)); 595 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A,&prefix)); 596 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix)); 597 PetscCall(PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name)); 598 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 599 PetscCall(MatView(Anatural,viewer)); 600 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 601 PetscCall(MatDestroy(&Anatural)); 602 PetscFunctionReturn(0); 603 } 604 605 PetscErrorCode MatLoad_MPI_DA(Mat A,PetscViewer viewer) 606 { 607 DM da; 608 Mat Anatural,Aapp; 609 AO ao; 610 PetscInt rstart,rend,*app,i,m,n,M,N; 611 IS is; 612 MPI_Comm comm; 613 614 PetscFunctionBegin; 615 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 616 PetscCall(MatGetDM(A, &da)); 617 PetscCheck(da,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA"); 618 619 /* Load the matrix in natural ordering */ 620 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&Anatural)); 621 PetscCall(MatSetType(Anatural,((PetscObject)A)->type_name)); 622 PetscCall(MatGetSize(A,&M,&N)); 623 PetscCall(MatGetLocalSize(A,&m,&n)); 624 PetscCall(MatSetSizes(Anatural,m,n,M,N)); 625 PetscCall(MatLoad(Anatural,viewer)); 626 627 /* Map natural ordering to application ordering and create IS */ 628 PetscCall(DMDAGetAO(da,&ao)); 629 PetscCall(MatGetOwnershipRange(Anatural,&rstart,&rend)); 630 PetscCall(PetscMalloc1(rend-rstart,&app)); 631 for (i=rstart; i<rend; i++) app[i-rstart] = i; 632 PetscCall(AOPetscToApplication(ao,rend-rstart,app)); 633 PetscCall(ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is)); 634 635 /* Do permutation and replace header */ 636 PetscCall(MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp)); 637 PetscCall(MatHeaderReplace(A,&Aapp)); 638 PetscCall(ISDestroy(&is)); 639 PetscCall(MatDestroy(&Anatural)); 640 PetscFunctionReturn(0); 641 } 642 643 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 644 { 645 PetscInt dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P; 646 Mat A; 647 MPI_Comm comm; 648 MatType Atype; 649 void (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL; 650 MatType mtype; 651 PetscMPIInt size; 652 DM_DA *dd = (DM_DA*)da->data; 653 654 PetscFunctionBegin; 655 PetscCall(MatInitializePackage()); 656 mtype = da->mattype; 657 658 /* 659 m 660 ------------------------------------------------------ 661 | | 662 | | 663 | ---------------------- | 664 | | | | 665 n | ny | | | 666 | | | | 667 | .--------------------- | 668 | (xs,ys) nx | 669 | . | 670 | (gxs,gys) | 671 | | 672 ----------------------------------------------------- 673 */ 674 675 /* 676 nc - number of components per grid point 677 col - number of colors needed in one direction for single component problem 678 679 */ 680 M = dd->M; 681 N = dd->N; 682 P = dd->P; 683 dim = da->dim; 684 dof = dd->w; 685 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 686 PetscCall(DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz)); 687 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 688 PetscCall(MatCreate(comm,&A)); 689 PetscCall(MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P)); 690 PetscCall(MatSetType(A,mtype)); 691 PetscCall(MatSetFromOptions(A)); 692 if (dof*nx*ny*nz < da->bind_below) { 693 PetscCall(MatSetBindingPropagates(A,PETSC_TRUE)); 694 PetscCall(MatBindToCPU(A,PETSC_TRUE)); 695 } 696 PetscCall(MatSetDM(A,da)); 697 if (da->structure_only) PetscCall(MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE)); 698 PetscCall(MatGetType(A,&Atype)); 699 /* 700 We do not provide a getmatrix function in the DMDA operations because 701 the basic DMDA does not know about matrices. We think of DMDA as being more 702 more low-level than matrices. This is kind of cheating but, cause sometimes 703 we think of DMDA has higher level than matrices. 704 705 We could switch based on Atype (or mtype), but we do not since the 706 specialized setting routines depend only on the particular preallocation 707 details of the matrix, not the type itself. 708 */ 709 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij)); 710 if (!aij) { 711 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij)); 712 } 713 if (!aij) { 714 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij)); 715 if (!baij) { 716 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij)); 717 } 718 if (!baij) { 719 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij)); 720 if (!sbaij) { 721 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij)); 722 } 723 if (!sbaij) { 724 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell)); 725 if (!sell) { 726 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell)); 727 } 728 } 729 if (!sell) { 730 PetscCall(PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is)); 731 } 732 } 733 } 734 if (aij) { 735 if (dim == 1) { 736 if (dd->ofill) { 737 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A)); 738 } else { 739 DMBoundaryType bx; 740 PetscMPIInt size; 741 PetscCall(DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL)); 742 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 743 if (size == 1 && bx == DM_BOUNDARY_NONE) { 744 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A)); 745 } else { 746 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da,A)); 747 } 748 } 749 } else if (dim == 2) { 750 if (dd->ofill) { 751 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A)); 752 } else { 753 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da,A)); 754 } 755 } else if (dim == 3) { 756 if (dd->ofill) { 757 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A)); 758 } else { 759 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da,A)); 760 } 761 } 762 } else if (baij) { 763 if (dim == 2) { 764 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da,A)); 765 } else if (dim == 3) { 766 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da,A)); 767 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 768 } else if (sbaij) { 769 if (dim == 2) { 770 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da,A)); 771 } else if (dim == 3) { 772 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da,A)); 773 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 774 } else if (sell) { 775 if (dim == 2) { 776 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da,A)); 777 } else if (dim == 3) { 778 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da,A)); 779 } else SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim); 780 } else if (is) { 781 PetscCall(DMCreateMatrix_DA_IS(da,A)); 782 } else { 783 ISLocalToGlobalMapping ltog; 784 785 PetscCall(MatSetBlockSize(A,dof)); 786 PetscCall(MatSetUp(A)); 787 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 788 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 789 } 790 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 791 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 792 PetscCall(MatSetDM(A,da)); 793 PetscCallMPI(MPI_Comm_size(comm,&size)); 794 if (size > 1) { 795 /* change viewer to display matrix in natural ordering */ 796 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 797 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 798 } 799 *J = A; 800 PetscFunctionReturn(0); 801 } 802 803 /* ---------------------------------------------------------------------------------*/ 804 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 805 806 PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J) 807 { 808 DM_DA *da = (DM_DA*)dm->data; 809 Mat lJ,P; 810 ISLocalToGlobalMapping ltog; 811 IS is; 812 PetscBT bt; 813 const PetscInt *e_loc,*idx; 814 PetscInt i,nel,nen,nv,dof,*gidx,n,N; 815 816 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 817 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 818 PetscFunctionBegin; 819 dof = da->w; 820 PetscCall(MatSetBlockSize(J,dof)); 821 PetscCall(DMGetLocalToGlobalMapping(dm,<og)); 822 823 /* flag local elements indices in local DMDA numbering */ 824 PetscCall(ISLocalToGlobalMappingGetSize(ltog,&nv)); 825 PetscCall(PetscBTCreate(nv/dof,&bt)); 826 PetscCall(DMDAGetElements(dm,&nel,&nen,&e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 827 for (i=0;i<nel*nen;i++) PetscCall(PetscBTSet(bt,e_loc[i])); 828 829 /* filter out (set to -1) the global indices not used by the local elements */ 830 PetscCall(PetscMalloc1(nv/dof,&gidx)); 831 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog,&idx)); 832 PetscCall(PetscArraycpy(gidx,idx,nv/dof)); 833 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog,&idx)); 834 for (i=0;i<nv/dof;i++) if (!PetscBTLookup(bt,i)) gidx[i] = -1; 835 PetscCall(PetscBTDestroy(&bt)); 836 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nv/dof,gidx,PETSC_OWN_POINTER,&is)); 837 PetscCall(ISLocalToGlobalMappingCreateIS(is,<og)); 838 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 839 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 840 PetscCall(ISDestroy(&is)); 841 842 /* Preallocation */ 843 if (dm->prealloc_skip) { 844 PetscCall(MatSetUp(J)); 845 } else { 846 PetscCall(MatISGetLocalMat(J,&lJ)); 847 PetscCall(MatGetLocalToGlobalMapping(lJ,<og,NULL)); 848 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ),&P)); 849 PetscCall(MatSetType(P,MATPREALLOCATOR)); 850 PetscCall(MatSetLocalToGlobalMapping(P,ltog,ltog)); 851 PetscCall(MatGetSize(lJ,&N,NULL)); 852 PetscCall(MatGetLocalSize(lJ,&n,NULL)); 853 PetscCall(MatSetSizes(P,n,n,N,N)); 854 PetscCall(MatSetBlockSize(P,dof)); 855 PetscCall(MatSetUp(P)); 856 for (i=0;i<nel;i++) { 857 PetscCall(MatSetValuesBlockedLocal(P,nen,e_loc+i*nen,nen,e_loc+i*nen,NULL,INSERT_VALUES)); 858 } 859 PetscCall(MatPreallocatorPreallocate(P,(PetscBool)!da->prealloc_only,lJ)); 860 PetscCall(MatISRestoreLocalMat(J,&lJ)); 861 PetscCall(DMDARestoreElements(dm,&nel,&nen,&e_loc)); 862 PetscCall(MatDestroy(&P)); 863 864 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 865 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 866 } 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J) 871 { 872 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p; 873 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 874 MPI_Comm comm; 875 PetscScalar *values; 876 DMBoundaryType bx,by; 877 ISLocalToGlobalMapping ltog; 878 DMDAStencilType st; 879 880 PetscFunctionBegin; 881 /* 882 nc - number of components per grid point 883 col - number of colors needed in one direction for single component problem 884 885 */ 886 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 887 col = 2*s + 1; 888 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 889 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 890 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 891 892 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 893 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 894 895 PetscCall(MatSetBlockSize(J,nc)); 896 /* determine the matrix preallocation information */ 897 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 898 for (i=xs; i<xs+nx; i++) { 899 900 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 901 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 902 903 for (j=ys; j<ys+ny; j++) { 904 slot = i - gxs + gnx*(j - gys); 905 906 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 907 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 908 909 cnt = 0; 910 for (k=0; k<nc; k++) { 911 for (l=lstart; l<lend+1; l++) { 912 for (p=pstart; p<pend+1; p++) { 913 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 914 cols[cnt++] = k + nc*(slot + gnx*l + p); 915 } 916 } 917 } 918 rows[k] = k + nc*(slot); 919 } 920 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 921 } 922 } 923 PetscCall(MatSetBlockSize(J,nc)); 924 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 925 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 926 MatPreallocateEnd(dnz,onz); 927 928 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 929 930 /* 931 For each node in the grid: we get the neighbors in the local (on processor ordering 932 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 933 PETSc ordering. 934 */ 935 if (!da->prealloc_only) { 936 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 937 for (i=xs; i<xs+nx; i++) { 938 939 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 940 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 941 942 for (j=ys; j<ys+ny; j++) { 943 slot = i - gxs + gnx*(j - gys); 944 945 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 946 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 947 948 cnt = 0; 949 for (k=0; k<nc; k++) { 950 for (l=lstart; l<lend+1; l++) { 951 for (p=pstart; p<pend+1; p++) { 952 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 953 cols[cnt++] = k + nc*(slot + gnx*l + p); 954 } 955 } 956 } 957 rows[k] = k + nc*(slot); 958 } 959 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 960 } 961 } 962 PetscCall(PetscFree(values)); 963 /* do not copy values to GPU since they are all zero and not yet needed there */ 964 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 965 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 966 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 967 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 968 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 969 } 970 PetscCall(PetscFree2(rows,cols)); 971 PetscFunctionReturn(0); 972 } 973 974 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J) 975 { 976 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 977 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 978 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 979 MPI_Comm comm; 980 PetscScalar *values; 981 DMBoundaryType bx,by,bz; 982 ISLocalToGlobalMapping ltog; 983 DMDAStencilType st; 984 985 PetscFunctionBegin; 986 /* 987 nc - number of components per grid point 988 col - number of colors needed in one direction for single component problem 989 990 */ 991 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 992 col = 2*s + 1; 993 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 994 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 995 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 996 997 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 998 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 999 1000 PetscCall(MatSetBlockSize(J,nc)); 1001 /* determine the matrix preallocation information */ 1002 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 1003 for (i=xs; i<xs+nx; i++) { 1004 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1005 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1006 for (j=ys; j<ys+ny; j++) { 1007 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1008 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1009 for (k=zs; k<zs+nz; k++) { 1010 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1011 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1012 1013 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1014 1015 cnt = 0; 1016 for (l=0; l<nc; l++) { 1017 for (ii=istart; ii<iend+1; ii++) { 1018 for (jj=jstart; jj<jend+1; jj++) { 1019 for (kk=kstart; kk<kend+1; kk++) { 1020 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1021 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1022 } 1023 } 1024 } 1025 } 1026 rows[l] = l + nc*(slot); 1027 } 1028 PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1029 } 1030 } 1031 } 1032 PetscCall(MatSetBlockSize(J,nc)); 1033 PetscCall(MatSeqSELLSetPreallocation(J,0,dnz)); 1034 PetscCall(MatMPISELLSetPreallocation(J,0,dnz,0,onz)); 1035 MatPreallocateEnd(dnz,onz); 1036 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1037 1038 /* 1039 For each node in the grid: we get the neighbors in the local (on processor ordering 1040 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1041 PETSc ordering. 1042 */ 1043 if (!da->prealloc_only) { 1044 PetscCall(PetscCalloc1(col*col*col*nc*nc*nc,&values)); 1045 for (i=xs; i<xs+nx; i++) { 1046 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1047 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1048 for (j=ys; j<ys+ny; j++) { 1049 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1050 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1051 for (k=zs; k<zs+nz; k++) { 1052 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1053 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1054 1055 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1056 1057 cnt = 0; 1058 for (l=0; l<nc; l++) { 1059 for (ii=istart; ii<iend+1; ii++) { 1060 for (jj=jstart; jj<jend+1; jj++) { 1061 for (kk=kstart; kk<kend+1; kk++) { 1062 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1063 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1064 } 1065 } 1066 } 1067 } 1068 rows[l] = l + nc*(slot); 1069 } 1070 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES)); 1071 } 1072 } 1073 } 1074 PetscCall(PetscFree(values)); 1075 /* do not copy values to GPU since they are all zero and not yet needed there */ 1076 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1077 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1078 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1079 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1080 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1081 } 1082 PetscCall(PetscFree2(rows,cols)); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J) 1087 { 1088 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N; 1089 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1090 MPI_Comm comm; 1091 DMBoundaryType bx,by; 1092 ISLocalToGlobalMapping ltog,mltog; 1093 DMDAStencilType st; 1094 PetscBool removedups = PETSC_FALSE,alreadyboundtocpu = PETSC_TRUE; 1095 1096 PetscFunctionBegin; 1097 /* 1098 nc - number of components per grid point 1099 col - number of colors needed in one direction for single component problem 1100 1101 */ 1102 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 1103 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1104 col = 2*s + 1; 1105 /* 1106 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1107 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1108 */ 1109 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1110 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1111 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1112 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1113 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1114 1115 PetscCall(PetscMalloc2(nc,&rows,col*col*nc*nc,&cols)); 1116 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1117 1118 PetscCall(MatSetBlockSize(J,nc)); 1119 /* determine the matrix preallocation information */ 1120 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 1121 for (i=xs; i<xs+nx; i++) { 1122 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1123 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1124 1125 for (j=ys; j<ys+ny; j++) { 1126 slot = i - gxs + gnx*(j - gys); 1127 1128 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1129 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1130 1131 cnt = 0; 1132 for (k=0; k<nc; k++) { 1133 for (l=lstart; l<lend+1; l++) { 1134 for (p=pstart; p<pend+1; p++) { 1135 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1136 cols[cnt++] = k + nc*(slot + gnx*l + p); 1137 } 1138 } 1139 } 1140 rows[k] = k + nc*(slot); 1141 } 1142 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1143 else PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1144 } 1145 } 1146 PetscCall(MatSetBlockSize(J,nc)); 1147 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1148 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1149 MatPreallocateEnd(dnz,onz); 1150 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1151 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1152 1153 /* 1154 For each node in the grid: we get the neighbors in the local (on processor ordering 1155 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1156 PETSc ordering. 1157 */ 1158 if (!da->prealloc_only) { 1159 for (i=xs; i<xs+nx; i++) { 1160 1161 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1162 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1163 1164 for (j=ys; j<ys+ny; j++) { 1165 slot = i - gxs + gnx*(j - gys); 1166 1167 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1168 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1169 1170 cnt = 0; 1171 for (l=lstart; l<lend+1; l++) { 1172 for (p=pstart; p<pend+1; p++) { 1173 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1174 cols[cnt++] = nc*(slot + gnx*l + p); 1175 for (k=1; k<nc; k++) { 1176 cols[cnt] = 1 + cols[cnt-1];cnt++; 1177 } 1178 } 1179 } 1180 } 1181 for (k=0; k<nc; k++) rows[k] = k + nc*(slot); 1182 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1183 } 1184 } 1185 /* do not copy values to GPU since they are all zero and not yet needed there */ 1186 PetscCall(MatBoundToCPU(J,&alreadyboundtocpu)); 1187 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1188 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1189 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1190 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1191 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1192 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1193 } 1194 PetscCall(PetscFree2(rows,cols)); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J) 1199 { 1200 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1201 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N; 1202 PetscInt lstart,lend,pstart,pend,*dnz,*onz; 1203 DM_DA *dd = (DM_DA*)da->data; 1204 PetscInt ifill_col,*ofill = dd->ofill, *dfill = dd->dfill; 1205 MPI_Comm comm; 1206 DMBoundaryType bx,by; 1207 ISLocalToGlobalMapping ltog; 1208 DMDAStencilType st; 1209 PetscBool removedups = PETSC_FALSE; 1210 1211 PetscFunctionBegin; 1212 /* 1213 nc - number of components per grid point 1214 col - number of colors needed in one direction for single component problem 1215 1216 */ 1217 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st)); 1218 col = 2*s + 1; 1219 /* 1220 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1221 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1222 */ 1223 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1224 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1225 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1226 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1227 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1228 1229 PetscCall(PetscMalloc1(col*col*nc,&cols)); 1230 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1231 1232 PetscCall(MatSetBlockSize(J,nc)); 1233 /* determine the matrix preallocation information */ 1234 MatPreallocateBegin(comm,nc*nx*ny,nc*nx*ny,dnz,onz); 1235 for (i=xs; i<xs+nx; i++) { 1236 1237 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1238 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1239 1240 for (j=ys; j<ys+ny; j++) { 1241 slot = i - gxs + gnx*(j - gys); 1242 1243 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1244 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1245 1246 for (k=0; k<nc; k++) { 1247 cnt = 0; 1248 for (l=lstart; l<lend+1; l++) { 1249 for (p=pstart; p<pend+1; p++) { 1250 if (l || p) { 1251 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1252 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1253 } 1254 } else { 1255 if (dfill) { 1256 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1257 } else { 1258 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1259 } 1260 } 1261 } 1262 } 1263 row = k + nc*(slot); 1264 maxcnt = PetscMax(maxcnt,cnt); 1265 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1266 else PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 1267 } 1268 } 1269 } 1270 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1271 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1272 MatPreallocateEnd(dnz,onz); 1273 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1274 1275 /* 1276 For each node in the grid: we get the neighbors in the local (on processor ordering 1277 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1278 PETSc ordering. 1279 */ 1280 if (!da->prealloc_only) { 1281 for (i=xs; i<xs+nx; i++) { 1282 1283 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1284 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1285 1286 for (j=ys; j<ys+ny; j++) { 1287 slot = i - gxs + gnx*(j - gys); 1288 1289 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1290 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1291 1292 for (k=0; k<nc; k++) { 1293 cnt = 0; 1294 for (l=lstart; l<lend+1; l++) { 1295 for (p=pstart; p<pend+1; p++) { 1296 if (l || p) { 1297 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1298 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p); 1299 } 1300 } else { 1301 if (dfill) { 1302 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p); 1303 } else { 1304 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p); 1305 } 1306 } 1307 } 1308 } 1309 row = k + nc*(slot); 1310 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1311 } 1312 } 1313 } 1314 /* do not copy values to GPU since they are all zero and not yet needed there */ 1315 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1316 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1317 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1318 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1319 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1320 } 1321 PetscCall(PetscFree(cols)); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /* ---------------------------------------------------------------------------------*/ 1326 1327 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J) 1328 { 1329 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1330 PetscInt m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL; 1331 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 1332 MPI_Comm comm; 1333 DMBoundaryType bx,by,bz; 1334 ISLocalToGlobalMapping ltog,mltog; 1335 DMDAStencilType st; 1336 PetscBool removedups = PETSC_FALSE; 1337 1338 PetscFunctionBegin; 1339 /* 1340 nc - number of components per grid point 1341 col - number of colors needed in one direction for single component problem 1342 1343 */ 1344 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 1345 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1346 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1347 } 1348 col = 2*s + 1; 1349 1350 /* 1351 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1352 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1353 */ 1354 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 1355 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 1356 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 1357 1358 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1359 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1360 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1361 1362 PetscCall(PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols)); 1363 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1364 1365 PetscCall(MatSetBlockSize(J,nc)); 1366 /* determine the matrix preallocation information */ 1367 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 1368 for (i=xs; i<xs+nx; i++) { 1369 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1370 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1371 for (j=ys; j<ys+ny; j++) { 1372 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1373 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1374 for (k=zs; k<zs+nz; k++) { 1375 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1376 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1377 1378 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1379 1380 cnt = 0; 1381 for (l=0; l<nc; l++) { 1382 for (ii=istart; ii<iend+1; ii++) { 1383 for (jj=jstart; jj<jend+1; jj++) { 1384 for (kk=kstart; kk<kend+1; kk++) { 1385 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1386 cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk); 1387 } 1388 } 1389 } 1390 } 1391 rows[l] = l + nc*(slot); 1392 } 1393 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1394 else PetscCall(MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz)); 1395 } 1396 } 1397 } 1398 PetscCall(MatSetBlockSize(J,nc)); 1399 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 1400 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 1401 MatPreallocateEnd(dnz,onz); 1402 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1403 if (!mltog) { 1404 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1405 } 1406 1407 /* 1408 For each node in the grid: we get the neighbors in the local (on processor ordering 1409 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1410 PETSc ordering. 1411 */ 1412 if (!da->prealloc_only) { 1413 for (i=xs; i<xs+nx; i++) { 1414 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1415 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1416 for (j=ys; j<ys+ny; j++) { 1417 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1418 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1419 for (k=zs; k<zs+nz; k++) { 1420 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1421 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1422 1423 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1424 1425 cnt = 0; 1426 for (kk=kstart; kk<kend+1; kk++) { 1427 for (jj=jstart; jj<jend+1; jj++) { 1428 for (ii=istart; ii<iend+1; ii++) { 1429 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1430 cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk); 1431 for (l=1; l<nc; l++) { 1432 cols[cnt] = 1 + cols[cnt-1];cnt++; 1433 } 1434 } 1435 } 1436 } 1437 } 1438 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1439 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1440 } 1441 } 1442 } 1443 /* do not copy values to GPU since they are all zero and not yet needed there */ 1444 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1445 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1446 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1447 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) { 1448 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1449 } 1450 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1451 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1452 } 1453 PetscCall(PetscFree2(rows,cols)); 1454 PetscFunctionReturn(0); 1455 } 1456 1457 /* ---------------------------------------------------------------------------------*/ 1458 1459 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J) 1460 { 1461 DM_DA *dd = (DM_DA*)da->data; 1462 PetscInt xs,nx,i,j,gxs,gnx,row,k,l; 1463 PetscInt m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols; 1464 PetscInt *ofill = dd->ofill,*dfill = dd->dfill; 1465 DMBoundaryType bx; 1466 ISLocalToGlobalMapping ltog; 1467 PetscMPIInt rank,size; 1468 1469 PetscFunctionBegin; 1470 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank)); 1471 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da),&size)); 1472 1473 /* 1474 nc - number of components per grid point 1475 1476 */ 1477 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1478 PetscCheck(s <= 1,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1479 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1480 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1481 1482 PetscCall(MatSetBlockSize(J,nc)); 1483 PetscCall(PetscCalloc2(nx*nc,&cols,nx*nc,&ocols)); 1484 1485 /* 1486 note should be smaller for first and last process with no periodic 1487 does not handle dfill 1488 */ 1489 cnt = 0; 1490 /* coupling with process to the left */ 1491 for (i=0; i<s; i++) { 1492 for (j=0; j<nc; j++) { 1493 ocols[cnt] = ((rank == 0) ? 0 : (s - i)*(ofill[j+1] - ofill[j])); 1494 cols[cnt] = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]); 1495 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1496 if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1497 else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]); 1498 } 1499 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1500 cnt++; 1501 } 1502 } 1503 for (i=s; i<nx-s; i++) { 1504 for (j=0; j<nc; j++) { 1505 cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]); 1506 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1507 cnt++; 1508 } 1509 } 1510 /* coupling with process to the right */ 1511 for (i=nx-s; i<nx; i++) { 1512 for (j=0; j<nc; j++) { 1513 ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j])); 1514 cols[cnt] = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]); 1515 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1516 if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1517 else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]); 1518 } 1519 maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]); 1520 cnt++; 1521 } 1522 } 1523 1524 PetscCall(MatSeqAIJSetPreallocation(J,0,cols)); 1525 PetscCall(MatMPIAIJSetPreallocation(J,0,cols,0,ocols)); 1526 PetscCall(PetscFree2(cols,ocols)); 1527 1528 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1529 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1530 1531 /* 1532 For each node in the grid: we get the neighbors in the local (on processor ordering 1533 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1534 PETSc ordering. 1535 */ 1536 if (!da->prealloc_only) { 1537 PetscCall(PetscMalloc1(maxcnt,&cols)); 1538 row = xs*nc; 1539 /* coupling with process to the left */ 1540 for (i=xs; i<xs+s; i++) { 1541 for (j=0; j<nc; j++) { 1542 cnt = 0; 1543 if (rank) { 1544 for (l=0; l<s; l++) { 1545 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1546 } 1547 } 1548 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1549 for (l=0; l<s; l++) { 1550 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k]; 1551 } 1552 } 1553 if (dfill) { 1554 for (k=dfill[j]; k<dfill[j+1]; k++) { 1555 cols[cnt++] = i*nc + dfill[k]; 1556 } 1557 } else { 1558 for (k=0; k<nc; k++) { 1559 cols[cnt++] = i*nc + k; 1560 } 1561 } 1562 for (l=0; l<s; l++) { 1563 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1564 } 1565 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1566 row++; 1567 } 1568 } 1569 for (i=xs+s; i<xs+nx-s; i++) { 1570 for (j=0; j<nc; j++) { 1571 cnt = 0; 1572 for (l=0; l<s; l++) { 1573 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1574 } 1575 if (dfill) { 1576 for (k=dfill[j]; k<dfill[j+1]; k++) { 1577 cols[cnt++] = i*nc + dfill[k]; 1578 } 1579 } else { 1580 for (k=0; k<nc; k++) { 1581 cols[cnt++] = i*nc + k; 1582 } 1583 } 1584 for (l=0; l<s; l++) { 1585 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1586 } 1587 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1588 row++; 1589 } 1590 } 1591 /* coupling with process to the right */ 1592 for (i=xs+nx-s; i<xs+nx; i++) { 1593 for (j=0; j<nc; j++) { 1594 cnt = 0; 1595 for (l=0; l<s; l++) { 1596 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k]; 1597 } 1598 if (dfill) { 1599 for (k=dfill[j]; k<dfill[j+1]; k++) { 1600 cols[cnt++] = i*nc + dfill[k]; 1601 } 1602 } else { 1603 for (k=0; k<nc; k++) { 1604 cols[cnt++] = i*nc + k; 1605 } 1606 } 1607 if (rank < size-1) { 1608 for (l=0; l<s; l++) { 1609 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k]; 1610 } 1611 } 1612 if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1613 for (l=0; l<s; l++) { 1614 for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k]; 1615 } 1616 } 1617 PetscCall(MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES)); 1618 row++; 1619 } 1620 } 1621 PetscCall(PetscFree(cols)); 1622 /* do not copy values to GPU since they are all zero and not yet needed there */ 1623 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1624 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1625 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1626 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1627 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1628 } 1629 PetscFunctionReturn(0); 1630 } 1631 1632 /* ---------------------------------------------------------------------------------*/ 1633 1634 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J) 1635 { 1636 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1637 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1638 PetscInt istart,iend; 1639 DMBoundaryType bx; 1640 ISLocalToGlobalMapping ltog,mltog; 1641 1642 PetscFunctionBegin; 1643 /* 1644 nc - number of components per grid point 1645 col - number of colors needed in one direction for single component problem 1646 1647 */ 1648 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1649 if (bx == DM_BOUNDARY_NONE) { 1650 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE)); 1651 } 1652 col = 2*s + 1; 1653 1654 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1655 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1656 1657 PetscCall(MatSetBlockSize(J,nc)); 1658 PetscCall(MatSeqAIJSetPreallocation(J,col*nc,NULL)); 1659 PetscCall(MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL)); 1660 1661 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1662 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1663 if (!mltog) { 1664 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1665 } 1666 1667 /* 1668 For each node in the grid: we get the neighbors in the local (on processor ordering 1669 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1670 PETSc ordering. 1671 */ 1672 if (!da->prealloc_only) { 1673 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1674 for (i=xs; i<xs+nx; i++) { 1675 istart = PetscMax(-s,gxs - i); 1676 iend = PetscMin(s,gxs + gnx - i - 1); 1677 slot = i - gxs; 1678 1679 cnt = 0; 1680 for (i1=istart; i1<iend+1; i1++) { 1681 cols[cnt++] = nc*(slot + i1); 1682 for (l=1; l<nc; l++) { 1683 cols[cnt] = 1 + cols[cnt-1];cnt++; 1684 } 1685 } 1686 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1687 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1688 } 1689 /* do not copy values to GPU since they are all zero and not yet needed there */ 1690 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1691 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1692 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1693 if (bx == DM_BOUNDARY_NONE) { 1694 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1695 } 1696 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1697 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1698 PetscCall(PetscFree2(rows,cols)); 1699 } 1700 PetscFunctionReturn(0); 1701 } 1702 1703 /* ---------------------------------------------------------------------------------*/ 1704 1705 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J) 1706 { 1707 PetscInt xs,nx,i,i1,slot,gxs,gnx; 1708 PetscInt m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l; 1709 PetscInt istart,iend; 1710 DMBoundaryType bx; 1711 ISLocalToGlobalMapping ltog,mltog; 1712 1713 PetscFunctionBegin; 1714 /* 1715 nc - number of components per grid point 1716 col - number of colors needed in one direction for single component problem 1717 */ 1718 PetscCall(DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL)); 1719 col = 2*s + 1; 1720 1721 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL)); 1722 PetscCall(DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL)); 1723 1724 PetscCall(MatSetBlockSize(J,nc)); 1725 PetscCall(MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc)); 1726 1727 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1728 PetscCall(MatGetLocalToGlobalMapping(J,&mltog,NULL)); 1729 if (!mltog) { 1730 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1731 } 1732 1733 /* 1734 For each node in the grid: we get the neighbors in the local (on processor ordering 1735 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1736 PETSc ordering. 1737 */ 1738 if (!da->prealloc_only) { 1739 PetscCall(PetscMalloc2(nc,&rows,col*nc*nc,&cols)); 1740 for (i=xs; i<xs+nx; i++) { 1741 istart = PetscMax(-s,gxs - i); 1742 iend = PetscMin(s,gxs + gnx - i - 1); 1743 slot = i - gxs; 1744 1745 cnt = 0; 1746 for (i1=istart; i1<iend+1; i1++) { 1747 cols[cnt++] = nc*(slot + i1); 1748 for (l=1; l<nc; l++) { 1749 cols[cnt] = 1 + cols[cnt-1];cnt++; 1750 } 1751 } 1752 rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1]; 1753 PetscCall(MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES)); 1754 } 1755 /* do not copy values to GPU since they are all zero and not yet needed there */ 1756 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1757 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1758 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1759 if (bx == DM_BOUNDARY_NONE) { 1760 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1761 } 1762 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1763 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1764 PetscCall(PetscFree2(rows,cols)); 1765 } 1766 PetscCall(MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE)); 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J) 1771 { 1772 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1773 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1774 PetscInt istart,iend,jstart,jend,ii,jj; 1775 MPI_Comm comm; 1776 PetscScalar *values; 1777 DMBoundaryType bx,by; 1778 DMDAStencilType st; 1779 ISLocalToGlobalMapping ltog; 1780 1781 PetscFunctionBegin; 1782 /* 1783 nc - number of components per grid point 1784 col - number of colors needed in one direction for single component problem 1785 */ 1786 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 1787 col = 2*s + 1; 1788 1789 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 1790 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 1791 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1792 1793 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 1794 1795 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1796 1797 /* determine the matrix preallocation information */ 1798 MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz); 1799 for (i=xs; i<xs+nx; i++) { 1800 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1801 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1802 for (j=ys; j<ys+ny; j++) { 1803 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1804 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1805 slot = i - gxs + gnx*(j - gys); 1806 1807 /* Find block columns in block row */ 1808 cnt = 0; 1809 for (ii=istart; ii<iend+1; ii++) { 1810 for (jj=jstart; jj<jend+1; jj++) { 1811 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1812 cols[cnt++] = slot + ii + gnx*jj; 1813 } 1814 } 1815 } 1816 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1817 } 1818 } 1819 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1820 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1821 MatPreallocateEnd(dnz,onz); 1822 1823 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1824 1825 /* 1826 For each node in the grid: we get the neighbors in the local (on processor ordering 1827 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1828 PETSc ordering. 1829 */ 1830 if (!da->prealloc_only) { 1831 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 1832 for (i=xs; i<xs+nx; i++) { 1833 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1834 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1835 for (j=ys; j<ys+ny; j++) { 1836 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1837 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1838 slot = i - gxs + gnx*(j - gys); 1839 cnt = 0; 1840 for (ii=istart; ii<iend+1; ii++) { 1841 for (jj=jstart; jj<jend+1; jj++) { 1842 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1843 cols[cnt++] = slot + ii + gnx*jj; 1844 } 1845 } 1846 } 1847 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1848 } 1849 } 1850 PetscCall(PetscFree(values)); 1851 /* do not copy values to GPU since they are all zero and not yet needed there */ 1852 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1853 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1854 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1855 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1856 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1857 } 1858 PetscCall(PetscFree(cols)); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J) 1863 { 1864 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1865 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 1866 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 1867 MPI_Comm comm; 1868 PetscScalar *values; 1869 DMBoundaryType bx,by,bz; 1870 DMDAStencilType st; 1871 ISLocalToGlobalMapping ltog; 1872 1873 PetscFunctionBegin; 1874 /* 1875 nc - number of components per grid point 1876 col - number of colors needed in one direction for single component problem 1877 1878 */ 1879 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 1880 col = 2*s + 1; 1881 1882 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 1883 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 1884 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 1885 1886 PetscCall(PetscMalloc1(col*col*col,&cols)); 1887 1888 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1889 1890 /* determine the matrix preallocation information */ 1891 MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz); 1892 for (i=xs; i<xs+nx; i++) { 1893 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1894 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1895 for (j=ys; j<ys+ny; j++) { 1896 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1897 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1898 for (k=zs; k<zs+nz; k++) { 1899 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1900 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1901 1902 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1903 1904 /* Find block columns in block row */ 1905 cnt = 0; 1906 for (ii=istart; ii<iend+1; ii++) { 1907 for (jj=jstart; jj<jend+1; jj++) { 1908 for (kk=kstart; kk<kend+1; kk++) { 1909 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1910 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1911 } 1912 } 1913 } 1914 } 1915 PetscCall(MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz)); 1916 } 1917 } 1918 } 1919 PetscCall(MatSeqBAIJSetPreallocation(J,nc,0,dnz)); 1920 PetscCall(MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 1921 MatPreallocateEnd(dnz,onz); 1922 1923 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 1924 1925 /* 1926 For each node in the grid: we get the neighbors in the local (on processor ordering 1927 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1928 PETSc ordering. 1929 */ 1930 if (!da->prealloc_only) { 1931 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 1932 for (i=xs; i<xs+nx; i++) { 1933 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 1934 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 1935 for (j=ys; j<ys+ny; j++) { 1936 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 1937 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 1938 for (k=zs; k<zs+nz; k++) { 1939 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 1940 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 1941 1942 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 1943 1944 cnt = 0; 1945 for (ii=istart; ii<iend+1; ii++) { 1946 for (jj=jstart; jj<jend+1; jj++) { 1947 for (kk=kstart; kk<kend+1; kk++) { 1948 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 1949 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 1950 } 1951 } 1952 } 1953 } 1954 PetscCall(MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 1955 } 1956 } 1957 } 1958 PetscCall(PetscFree(values)); 1959 /* do not copy values to GPU since they are all zero and not yet needed there */ 1960 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 1961 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 1962 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 1963 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 1964 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 1965 } 1966 PetscCall(PetscFree(cols)); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /* 1971 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1972 identify in the local ordering with periodic domain. 1973 */ 1974 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[]) 1975 { 1976 PetscInt i,n; 1977 1978 PetscFunctionBegin; 1979 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,1,row,row)); 1980 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col)); 1981 for (i=0,n=0; i<*cnt; i++) { 1982 if (col[i] >= *row) col[n++] = col[i]; 1983 } 1984 *cnt = n; 1985 PetscFunctionReturn(0); 1986 } 1987 1988 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J) 1989 { 1990 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 1991 PetscInt m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz; 1992 PetscInt istart,iend,jstart,jend,ii,jj; 1993 MPI_Comm comm; 1994 PetscScalar *values; 1995 DMBoundaryType bx,by; 1996 DMDAStencilType st; 1997 ISLocalToGlobalMapping ltog; 1998 1999 PetscFunctionBegin; 2000 /* 2001 nc - number of components per grid point 2002 col - number of colors needed in one direction for single component problem 2003 */ 2004 PetscCall(DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st)); 2005 col = 2*s + 1; 2006 2007 PetscCall(DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL)); 2008 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL)); 2009 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2010 2011 PetscCall(PetscMalloc1(col*col*nc*nc,&cols)); 2012 2013 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2014 2015 /* determine the matrix preallocation information */ 2016 MatPreallocateBegin(comm,nx*ny,nx*ny,dnz,onz); 2017 for (i=xs; i<xs+nx; i++) { 2018 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2019 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2020 for (j=ys; j<ys+ny; j++) { 2021 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2022 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2023 slot = i - gxs + gnx*(j - gys); 2024 2025 /* Find block columns in block row */ 2026 cnt = 0; 2027 for (ii=istart; ii<iend+1; ii++) { 2028 for (jj=jstart; jj<jend+1; jj++) { 2029 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2030 cols[cnt++] = slot + ii + gnx*jj; 2031 } 2032 } 2033 } 2034 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2035 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2036 } 2037 } 2038 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2039 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2040 MatPreallocateEnd(dnz,onz); 2041 2042 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2043 2044 /* 2045 For each node in the grid: we get the neighbors in the local (on processor ordering 2046 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2047 PETSc ordering. 2048 */ 2049 if (!da->prealloc_only) { 2050 PetscCall(PetscCalloc1(col*col*nc*nc,&values)); 2051 for (i=xs; i<xs+nx; i++) { 2052 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2053 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2054 for (j=ys; j<ys+ny; j++) { 2055 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2056 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2057 slot = i - gxs + gnx*(j - gys); 2058 2059 /* Find block columns in block row */ 2060 cnt = 0; 2061 for (ii=istart; ii<iend+1; ii++) { 2062 for (jj=jstart; jj<jend+1; jj++) { 2063 if (st == DMDA_STENCIL_BOX || !ii || !jj) { 2064 cols[cnt++] = slot + ii + gnx*jj; 2065 } 2066 } 2067 } 2068 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2069 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2070 } 2071 } 2072 PetscCall(PetscFree(values)); 2073 /* do not copy values to GPU since they are all zero and not yet needed there */ 2074 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2075 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2076 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2077 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2078 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2079 } 2080 PetscCall(PetscFree(cols)); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J) 2085 { 2086 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2087 PetscInt m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz; 2088 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk; 2089 MPI_Comm comm; 2090 PetscScalar *values; 2091 DMBoundaryType bx,by,bz; 2092 DMDAStencilType st; 2093 ISLocalToGlobalMapping ltog; 2094 2095 PetscFunctionBegin; 2096 /* 2097 nc - number of components per grid point 2098 col - number of colors needed in one direction for single component problem 2099 */ 2100 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st)); 2101 col = 2*s + 1; 2102 2103 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2104 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2105 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2106 2107 /* create the matrix */ 2108 PetscCall(PetscMalloc1(col*col*col,&cols)); 2109 2110 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2111 2112 /* determine the matrix preallocation information */ 2113 MatPreallocateBegin(comm,nx*ny*nz,nx*ny*nz,dnz,onz); 2114 for (i=xs; i<xs+nx; i++) { 2115 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2116 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2117 for (j=ys; j<ys+ny; j++) { 2118 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2119 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2120 for (k=zs; k<zs+nz; k++) { 2121 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2122 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2123 2124 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2125 2126 /* Find block columns in block row */ 2127 cnt = 0; 2128 for (ii=istart; ii<iend+1; ii++) { 2129 for (jj=jstart; jj<jend+1; jj++) { 2130 for (kk=kstart; kk<kend+1; kk++) { 2131 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2132 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2133 } 2134 } 2135 } 2136 } 2137 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2138 PetscCall(MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz)); 2139 } 2140 } 2141 } 2142 PetscCall(MatSeqSBAIJSetPreallocation(J,nc,0,dnz)); 2143 PetscCall(MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz)); 2144 MatPreallocateEnd(dnz,onz); 2145 2146 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2147 2148 /* 2149 For each node in the grid: we get the neighbors in the local (on processor ordering 2150 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2151 PETSc ordering. 2152 */ 2153 if (!da->prealloc_only) { 2154 PetscCall(PetscCalloc1(col*col*col*nc*nc,&values)); 2155 for (i=xs; i<xs+nx; i++) { 2156 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2157 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2158 for (j=ys; j<ys+ny; j++) { 2159 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2160 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2161 for (k=zs; k<zs+nz; k++) { 2162 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2163 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2164 2165 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2166 2167 cnt = 0; 2168 for (ii=istart; ii<iend+1; ii++) { 2169 for (jj=jstart; jj<jend+1; jj++) { 2170 for (kk=kstart; kk<kend+1; kk++) { 2171 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) { 2172 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk; 2173 } 2174 } 2175 } 2176 } 2177 PetscCall(L2GFilterUpperTriangular(ltog,&slot,&cnt,cols)); 2178 PetscCall(MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES)); 2179 } 2180 } 2181 } 2182 PetscCall(PetscFree(values)); 2183 /* do not copy values to GPU since they are all zero and not yet needed there */ 2184 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2185 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2186 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2187 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2188 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2189 } 2190 PetscCall(PetscFree(cols)); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /* ---------------------------------------------------------------------------------*/ 2195 2196 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J) 2197 { 2198 PetscInt xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny; 2199 PetscInt m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz; 2200 PetscInt istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P; 2201 DM_DA *dd = (DM_DA*)da->data; 2202 PetscInt ifill_col,*dfill = dd->dfill,*ofill = dd->ofill; 2203 MPI_Comm comm; 2204 PetscScalar *values; 2205 DMBoundaryType bx,by,bz; 2206 ISLocalToGlobalMapping ltog; 2207 DMDAStencilType st; 2208 PetscBool removedups = PETSC_FALSE; 2209 2210 PetscFunctionBegin; 2211 /* 2212 nc - number of components per grid point 2213 col - number of colors needed in one direction for single component problem 2214 2215 */ 2216 PetscCall(DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st)); 2217 col = 2*s + 1; 2218 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\ 2219 by 2*stencil_width + 1\n"); 2220 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\ 2221 by 2*stencil_width + 1\n"); 2222 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0,PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\ 2223 by 2*stencil_width + 1\n"); 2224 2225 /* 2226 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2227 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2228 */ 2229 if (M == 1 && 2*s >= m) removedups = PETSC_TRUE; 2230 if (N == 1 && 2*s >= n) removedups = PETSC_TRUE; 2231 if (P == 1 && 2*s >= p) removedups = PETSC_TRUE; 2232 2233 PetscCall(DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz)); 2234 PetscCall(DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz)); 2235 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 2236 2237 PetscCall(PetscMalloc1(col*col*col*nc,&cols)); 2238 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 2239 2240 /* determine the matrix preallocation information */ 2241 MatPreallocateBegin(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz); 2242 2243 PetscCall(MatSetBlockSize(J,nc)); 2244 for (i=xs; i<xs+nx; i++) { 2245 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2246 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2247 for (j=ys; j<ys+ny; j++) { 2248 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2249 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2250 for (k=zs; k<zs+nz; k++) { 2251 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2252 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2253 2254 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2255 2256 for (l=0; l<nc; l++) { 2257 cnt = 0; 2258 for (ii=istart; ii<iend+1; ii++) { 2259 for (jj=jstart; jj<jend+1; jj++) { 2260 for (kk=kstart; kk<kend+1; kk++) { 2261 if (ii || jj || kk) { 2262 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2263 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2264 } 2265 } else { 2266 if (dfill) { 2267 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2268 } else { 2269 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2270 } 2271 } 2272 } 2273 } 2274 } 2275 row = l + nc*(slot); 2276 maxcnt = PetscMax(maxcnt,cnt); 2277 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2278 else PetscCall(MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz)); 2279 } 2280 } 2281 } 2282 } 2283 PetscCall(MatSeqAIJSetPreallocation(J,0,dnz)); 2284 PetscCall(MatMPIAIJSetPreallocation(J,0,dnz,0,onz)); 2285 MatPreallocateEnd(dnz,onz); 2286 PetscCall(MatSetLocalToGlobalMapping(J,ltog,ltog)); 2287 2288 /* 2289 For each node in the grid: we get the neighbors in the local (on processor ordering 2290 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2291 PETSc ordering. 2292 */ 2293 if (!da->prealloc_only) { 2294 PetscCall(PetscCalloc1(maxcnt,&values)); 2295 for (i=xs; i<xs+nx; i++) { 2296 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i)); 2297 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,m-i-1)); 2298 for (j=ys; j<ys+ny; j++) { 2299 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j)); 2300 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,n-j-1)); 2301 for (k=zs; k<zs+nz; k++) { 2302 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k)); 2303 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s,p-k-1)); 2304 2305 slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs); 2306 2307 for (l=0; l<nc; l++) { 2308 cnt = 0; 2309 for (ii=istart; ii<iend+1; ii++) { 2310 for (jj=jstart; jj<jend+1; jj++) { 2311 for (kk=kstart; kk<kend+1; kk++) { 2312 if (ii || jj || kk) { 2313 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/ 2314 for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2315 } 2316 } else { 2317 if (dfill) { 2318 for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2319 } else { 2320 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk); 2321 } 2322 } 2323 } 2324 } 2325 } 2326 row = l + nc*(slot); 2327 PetscCall(MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES)); 2328 } 2329 } 2330 } 2331 } 2332 PetscCall(PetscFree(values)); 2333 /* do not copy values to GPU since they are all zero and not yet needed there */ 2334 PetscCall(MatBindToCPU(J,PETSC_TRUE)); 2335 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 2336 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 2337 PetscCall(MatBindToCPU(J,PETSC_FALSE)); 2338 PetscCall(MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 2339 } 2340 PetscCall(PetscFree(cols)); 2341 PetscFunctionReturn(0); 2342 } 2343