1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(0); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(0); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) { 51 PetscInt nz; 52 53 PetscFunctionBegin; 54 if (!dfillsparse) PetscFunctionReturn(0); 55 56 /* Determine number of non-zeros */ 57 nz = (dfillsparse[w] - w - 1); 58 59 /* Allocate space for our copy of the given sparse matrix representation. */ 60 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 61 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 62 PetscFunctionReturn(0); 63 } 64 65 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) { 66 PetscInt i, k, cnt = 1; 67 68 PetscFunctionBegin; 69 70 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 71 columns to the left with any nonzeros in them plus 1 */ 72 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 73 for (i = 0; i < dd->w; i++) { 74 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 75 } 76 for (i = 0; i < dd->w; i++) { 77 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 78 } 79 PetscFunctionReturn(0); 80 } 81 82 /*@ 83 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 84 of the matrix returned by DMCreateMatrix(). 85 86 Logically Collective on da 87 88 Input Parameters: 89 + da - the distributed array 90 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 91 - ofill - the fill pattern in the off-diagonal blocks 92 93 Level: developer 94 95 Notes: 96 This only makes sense when you are doing multicomponent problems but using the 97 MPIAIJ matrix format 98 99 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 100 representing coupling and 0 entries for missing coupling. For example 101 $ dfill[9] = {1, 0, 0, 102 $ 1, 1, 0, 103 $ 0, 1, 1} 104 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 105 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 106 diagonal block). 107 108 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 109 can be represented in the dfill, ofill format 110 111 Contributed by Glenn Hammond 112 113 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 114 115 @*/ 116 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) { 117 DM_DA *dd = (DM_DA *)da->data; 118 119 PetscFunctionBegin; 120 /* save the given dfill and ofill information */ 121 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 122 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 123 124 /* count nonzeros in ofill columns */ 125 PetscCall(DMDASetBlockFills_Private2(dd)); 126 PetscFunctionReturn(0); 127 } 128 129 /*@ 130 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 131 of the matrix returned by DMCreateMatrix(), using sparse representations 132 of fill patterns. 133 134 Logically Collective on da 135 136 Input Parameters: 137 + da - the distributed array 138 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 139 - ofill - the sparse fill pattern in the off-diagonal blocks 140 141 Level: developer 142 143 Notes: This only makes sense when you are doing multicomponent problems but using the 144 MPIAIJ matrix format 145 146 The format for dfill and ofill is a sparse representation of a 147 dof-by-dof matrix with 1 entries representing coupling and 0 entries 148 for missing coupling. The sparse representation is a 1 dimensional 149 array of length nz + dof + 1, where nz is the number of non-zeros in 150 the matrix. The first dof entries in the array give the 151 starting array indices of each row's items in the rest of the array, 152 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 153 and the remaining nz items give the column indices of each of 154 the 1s within the logical 2D matrix. Each row's items within 155 the array are the column indices of the 1s within that row 156 of the 2D matrix. PETSc developers may recognize that this is the 157 same format as that computed by the DMDASetBlockFills_Private() 158 function from a dense 2D matrix representation. 159 160 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 161 can be represented in the dfill, ofill format 162 163 Contributed by Philip C. Roth 164 165 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 166 167 @*/ 168 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) { 169 DM_DA *dd = (DM_DA *)da->data; 170 171 PetscFunctionBegin; 172 /* save the given dfill and ofill information */ 173 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 174 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 175 176 /* count nonzeros in ofill columns */ 177 PetscCall(DMDASetBlockFills_Private2(dd)); 178 PetscFunctionReturn(0); 179 } 180 181 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) { 182 PetscInt dim, m, n, p, nc; 183 DMBoundaryType bx, by, bz; 184 MPI_Comm comm; 185 PetscMPIInt size; 186 PetscBool isBAIJ; 187 DM_DA *dd = (DM_DA *)da->data; 188 189 PetscFunctionBegin; 190 /* 191 m 192 ------------------------------------------------------ 193 | | 194 | | 195 | ---------------------- | 196 | | | | 197 n | yn | | | 198 | | | | 199 | .--------------------- | 200 | (xs,ys) xn | 201 | . | 202 | (gxs,gys) | 203 | | 204 ----------------------------------------------------- 205 */ 206 207 /* 208 nc - number of components per grid point 209 col - number of colors needed in one direction for single component problem 210 211 */ 212 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 213 214 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 215 PetscCallMPI(MPI_Comm_size(comm, &size)); 216 if (ctype == IS_COLORING_LOCAL) { 217 if (size == 1) { 218 ctype = IS_COLORING_GLOBAL; 219 } else if (dim > 1) { 220 if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) { 221 SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 222 } 223 } 224 } 225 226 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 227 matrices is for the blocks, not the individual matrix elements */ 228 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 229 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 230 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 231 if (isBAIJ) { 232 dd->w = 1; 233 dd->xs = dd->xs / nc; 234 dd->xe = dd->xe / nc; 235 dd->Xs = dd->Xs / nc; 236 dd->Xe = dd->Xe / nc; 237 } 238 239 /* 240 We do not provide a getcoloring function in the DMDA operations because 241 the basic DMDA does not know about matrices. We think of DMDA as being 242 more low-level then matrices. 243 */ 244 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 245 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 246 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 247 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 248 if (isBAIJ) { 249 dd->w = nc; 250 dd->xs = dd->xs * nc; 251 dd->xe = dd->xe * nc; 252 dd->Xs = dd->Xs * nc; 253 dd->Xe = dd->Xe * nc; 254 } 255 PetscFunctionReturn(0); 256 } 257 258 /* ---------------------------------------------------------------------------------*/ 259 260 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) { 261 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 262 PetscInt ncolors; 263 MPI_Comm comm; 264 DMBoundaryType bx, by; 265 DMDAStencilType st; 266 ISColoringValue *colors; 267 DM_DA *dd = (DM_DA *)da->data; 268 269 PetscFunctionBegin; 270 /* 271 nc - number of components per grid point 272 col - number of colors needed in one direction for single component problem 273 274 */ 275 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 276 col = 2 * s + 1; 277 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 278 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 279 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 280 281 /* special case as taught to us by Paul Hovland */ 282 if (st == DMDA_STENCIL_STAR && s == 1) { 283 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 284 } else { 285 if (ctype == IS_COLORING_GLOBAL) { 286 if (!dd->localcoloring) { 287 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 288 ii = 0; 289 for (j = ys; j < ys + ny; j++) { 290 for (i = xs; i < xs + nx; i++) { 291 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 292 } 293 } 294 ncolors = nc + nc * (col - 1 + col * (col - 1)); 295 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 296 } 297 *coloring = dd->localcoloring; 298 } else if (ctype == IS_COLORING_LOCAL) { 299 if (!dd->ghostedcoloring) { 300 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 301 ii = 0; 302 for (j = gys; j < gys + gny; j++) { 303 for (i = gxs; i < gxs + gnx; i++) { 304 for (k = 0; k < nc; k++) { 305 /* the complicated stuff is to handle periodic boundaries */ 306 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 307 } 308 } 309 } 310 ncolors = nc + nc * (col - 1 + col * (col - 1)); 311 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 312 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 313 314 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 315 } 316 *coloring = dd->ghostedcoloring; 317 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 318 } 319 PetscCall(ISColoringReference(*coloring)); 320 PetscFunctionReturn(0); 321 } 322 323 /* ---------------------------------------------------------------------------------*/ 324 325 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) { 326 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 327 PetscInt ncolors; 328 MPI_Comm comm; 329 DMBoundaryType bx, by, bz; 330 DMDAStencilType st; 331 ISColoringValue *colors; 332 DM_DA *dd = (DM_DA *)da->data; 333 334 PetscFunctionBegin; 335 /* 336 nc - number of components per grid point 337 col - number of colors needed in one direction for single component problem 338 339 */ 340 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 341 col = 2 * s + 1; 342 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 343 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 344 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 345 346 /* create the coloring */ 347 if (ctype == IS_COLORING_GLOBAL) { 348 if (!dd->localcoloring) { 349 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 350 ii = 0; 351 for (k = zs; k < zs + nz; k++) { 352 for (j = ys; j < ys + ny; j++) { 353 for (i = xs; i < xs + nx; i++) { 354 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 355 } 356 } 357 } 358 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 359 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 360 } 361 *coloring = dd->localcoloring; 362 } else if (ctype == IS_COLORING_LOCAL) { 363 if (!dd->ghostedcoloring) { 364 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 365 ii = 0; 366 for (k = gzs; k < gzs + gnz; k++) { 367 for (j = gys; j < gys + gny; j++) { 368 for (i = gxs; i < gxs + gnx; i++) { 369 for (l = 0; l < nc; l++) { 370 /* the complicated stuff is to handle periodic boundaries */ 371 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 372 } 373 } 374 } 375 } 376 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 377 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 378 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 379 } 380 *coloring = dd->ghostedcoloring; 381 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 382 PetscCall(ISColoringReference(*coloring)); 383 PetscFunctionReturn(0); 384 } 385 386 /* ---------------------------------------------------------------------------------*/ 387 388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) { 389 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 390 PetscInt ncolors; 391 MPI_Comm comm; 392 DMBoundaryType bx; 393 ISColoringValue *colors; 394 DM_DA *dd = (DM_DA *)da->data; 395 396 PetscFunctionBegin; 397 /* 398 nc - number of components per grid point 399 col - number of colors needed in one direction for single component problem 400 401 */ 402 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 403 col = 2 * s + 1; 404 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 405 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 406 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 407 408 /* create the coloring */ 409 if (ctype == IS_COLORING_GLOBAL) { 410 if (!dd->localcoloring) { 411 PetscCall(PetscMalloc1(nc * nx, &colors)); 412 if (dd->ofillcols) { 413 PetscInt tc = 0; 414 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 415 i1 = 0; 416 for (i = xs; i < xs + nx; i++) { 417 for (l = 0; l < nc; l++) { 418 if (dd->ofillcols[l] && (i % col)) { 419 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 420 } else { 421 colors[i1++] = l; 422 } 423 } 424 } 425 ncolors = nc + 2 * s * tc; 426 } else { 427 i1 = 0; 428 for (i = xs; i < xs + nx; i++) { 429 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 430 } 431 ncolors = nc + nc * (col - 1); 432 } 433 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 434 } 435 *coloring = dd->localcoloring; 436 } else if (ctype == IS_COLORING_LOCAL) { 437 if (!dd->ghostedcoloring) { 438 PetscCall(PetscMalloc1(nc * gnx, &colors)); 439 i1 = 0; 440 for (i = gxs; i < gxs + gnx; i++) { 441 for (l = 0; l < nc; l++) { 442 /* the complicated stuff is to handle periodic boundaries */ 443 colors[i1++] = l + nc * (SetInRange(i, m) % col); 444 } 445 } 446 ncolors = nc + nc * (col - 1); 447 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 448 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 449 } 450 *coloring = dd->ghostedcoloring; 451 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 452 PetscCall(ISColoringReference(*coloring)); 453 PetscFunctionReturn(0); 454 } 455 456 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) { 457 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 458 PetscInt ncolors; 459 MPI_Comm comm; 460 DMBoundaryType bx, by; 461 ISColoringValue *colors; 462 DM_DA *dd = (DM_DA *)da->data; 463 464 PetscFunctionBegin; 465 /* 466 nc - number of components per grid point 467 col - number of colors needed in one direction for single component problem 468 469 */ 470 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 471 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 472 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 473 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 474 /* create the coloring */ 475 if (ctype == IS_COLORING_GLOBAL) { 476 if (!dd->localcoloring) { 477 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 478 ii = 0; 479 for (j = ys; j < ys + ny; j++) { 480 for (i = xs; i < xs + nx; i++) { 481 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 482 } 483 } 484 ncolors = 5 * nc; 485 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 486 } 487 *coloring = dd->localcoloring; 488 } else if (ctype == IS_COLORING_LOCAL) { 489 if (!dd->ghostedcoloring) { 490 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 491 ii = 0; 492 for (j = gys; j < gys + gny; j++) { 493 for (i = gxs; i < gxs + gnx; i++) { 494 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 495 } 496 } 497 ncolors = 5 * nc; 498 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 499 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 500 } 501 *coloring = dd->ghostedcoloring; 502 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 503 PetscFunctionReturn(0); 504 } 505 506 /* =========================================================================== */ 507 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 509 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 510 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 512 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 521 522 /*@C 523 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 524 525 Logically Collective on mat 526 527 Input Parameters: 528 + mat - the matrix 529 - da - the da 530 531 Level: intermediate 532 533 @*/ 534 PetscErrorCode MatSetupDM(Mat mat, DM da) { 535 PetscFunctionBegin; 536 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 537 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 538 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 539 PetscFunctionReturn(0); 540 } 541 542 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) { 543 DM da; 544 const char *prefix; 545 Mat Anatural; 546 AO ao; 547 PetscInt rstart, rend, *petsc, i; 548 IS is; 549 MPI_Comm comm; 550 PetscViewerFormat format; 551 552 PetscFunctionBegin; 553 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 554 PetscCall(PetscViewerGetFormat(viewer, &format)); 555 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 556 557 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 558 PetscCall(MatGetDM(A, &da)); 559 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 560 561 PetscCall(DMDAGetAO(da, &ao)); 562 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 563 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 564 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 565 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 566 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 567 568 /* call viewer on natural ordering */ 569 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 570 PetscCall(ISDestroy(&is)); 571 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 572 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 573 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 574 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 575 PetscCall(MatView(Anatural, viewer)); 576 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 577 PetscCall(MatDestroy(&Anatural)); 578 PetscFunctionReturn(0); 579 } 580 581 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) { 582 DM da; 583 Mat Anatural, Aapp; 584 AO ao; 585 PetscInt rstart, rend, *app, i, m, n, M, N; 586 IS is; 587 MPI_Comm comm; 588 589 PetscFunctionBegin; 590 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 591 PetscCall(MatGetDM(A, &da)); 592 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 593 594 /* Load the matrix in natural ordering */ 595 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 596 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 597 PetscCall(MatGetSize(A, &M, &N)); 598 PetscCall(MatGetLocalSize(A, &m, &n)); 599 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 600 PetscCall(MatLoad(Anatural, viewer)); 601 602 /* Map natural ordering to application ordering and create IS */ 603 PetscCall(DMDAGetAO(da, &ao)); 604 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 605 PetscCall(PetscMalloc1(rend - rstart, &app)); 606 for (i = rstart; i < rend; i++) app[i - rstart] = i; 607 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 608 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 609 610 /* Do permutation and replace header */ 611 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 612 PetscCall(MatHeaderReplace(A, &Aapp)); 613 PetscCall(ISDestroy(&is)); 614 PetscCall(MatDestroy(&Anatural)); 615 PetscFunctionReturn(0); 616 } 617 618 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) { 619 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 620 Mat A; 621 MPI_Comm comm; 622 MatType Atype; 623 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 624 MatType mtype; 625 PetscMPIInt size; 626 DM_DA *dd = (DM_DA *)da->data; 627 628 PetscFunctionBegin; 629 PetscCall(MatInitializePackage()); 630 mtype = da->mattype; 631 632 /* 633 m 634 ------------------------------------------------------ 635 | | 636 | | 637 | ---------------------- | 638 | | | | 639 n | ny | | | 640 | | | | 641 | .--------------------- | 642 | (xs,ys) nx | 643 | . | 644 | (gxs,gys) | 645 | | 646 ----------------------------------------------------- 647 */ 648 649 /* 650 nc - number of components per grid point 651 col - number of colors needed in one direction for single component problem 652 653 */ 654 M = dd->M; 655 N = dd->N; 656 P = dd->P; 657 dim = da->dim; 658 dof = dd->w; 659 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 660 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 661 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 662 PetscCall(MatCreate(comm, &A)); 663 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 664 PetscCall(MatSetType(A, mtype)); 665 PetscCall(MatSetFromOptions(A)); 666 if (dof * nx * ny * nz < da->bind_below) { 667 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 668 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 669 } 670 PetscCall(MatSetDM(A, da)); 671 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 672 PetscCall(MatGetType(A, &Atype)); 673 /* 674 We do not provide a getmatrix function in the DMDA operations because 675 the basic DMDA does not know about matrices. We think of DMDA as being more 676 more low-level than matrices. This is kind of cheating but, cause sometimes 677 we think of DMDA has higher level than matrices. 678 679 We could switch based on Atype (or mtype), but we do not since the 680 specialized setting routines depend only on the particular preallocation 681 details of the matrix, not the type itself. 682 */ 683 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 684 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 685 if (!aij) { 686 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 687 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 688 if (!baij) { 689 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 690 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 691 if (!sbaij) { 692 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 693 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 694 } 695 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 696 } 697 } 698 if (aij) { 699 if (dim == 1) { 700 if (dd->ofill) { 701 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 702 } else { 703 DMBoundaryType bx; 704 PetscMPIInt size; 705 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 706 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 707 if (size == 1 && bx == DM_BOUNDARY_NONE) { 708 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 709 } else { 710 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 711 } 712 } 713 } else if (dim == 2) { 714 if (dd->ofill) { 715 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 716 } else { 717 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 718 } 719 } else if (dim == 3) { 720 if (dd->ofill) { 721 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 722 } else { 723 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 724 } 725 } 726 } else if (baij) { 727 if (dim == 2) { 728 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 729 } else if (dim == 3) { 730 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 731 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 732 } else if (sbaij) { 733 if (dim == 2) { 734 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 735 } else if (dim == 3) { 736 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 737 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 738 } else if (sell) { 739 if (dim == 2) { 740 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 741 } else if (dim == 3) { 742 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 743 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 744 } else if (is) { 745 PetscCall(DMCreateMatrix_DA_IS(da, A)); 746 } else { 747 ISLocalToGlobalMapping ltog; 748 749 PetscCall(MatSetBlockSize(A, dof)); 750 PetscCall(MatSetUp(A)); 751 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 752 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 753 } 754 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 755 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 756 PetscCall(MatSetDM(A, da)); 757 PetscCallMPI(MPI_Comm_size(comm, &size)); 758 if (size > 1) { 759 /* change viewer to display matrix in natural ordering */ 760 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 761 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 762 } 763 *J = A; 764 PetscFunctionReturn(0); 765 } 766 767 /* ---------------------------------------------------------------------------------*/ 768 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 769 770 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) { 771 DM_DA *da = (DM_DA *)dm->data; 772 Mat lJ, P; 773 ISLocalToGlobalMapping ltog; 774 IS is; 775 PetscBT bt; 776 const PetscInt *e_loc, *idx; 777 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 778 779 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 780 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 781 PetscFunctionBegin; 782 dof = da->w; 783 PetscCall(MatSetBlockSize(J, dof)); 784 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 785 786 /* flag local elements indices in local DMDA numbering */ 787 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 788 PetscCall(PetscBTCreate(nv / dof, &bt)); 789 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 790 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 791 792 /* filter out (set to -1) the global indices not used by the local elements */ 793 PetscCall(PetscMalloc1(nv / dof, &gidx)); 794 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 795 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 796 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 797 for (i = 0; i < nv / dof; i++) 798 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 799 PetscCall(PetscBTDestroy(&bt)); 800 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 801 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 802 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 803 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 804 PetscCall(ISDestroy(&is)); 805 806 /* Preallocation */ 807 if (dm->prealloc_skip) { 808 PetscCall(MatSetUp(J)); 809 } else { 810 PetscCall(MatISGetLocalMat(J, &lJ)); 811 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 812 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 813 PetscCall(MatSetType(P, MATPREALLOCATOR)); 814 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 815 PetscCall(MatGetSize(lJ, &N, NULL)); 816 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 817 PetscCall(MatSetSizes(P, n, n, N, N)); 818 PetscCall(MatSetBlockSize(P, dof)); 819 PetscCall(MatSetUp(P)); 820 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 821 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 822 PetscCall(MatISRestoreLocalMat(J, &lJ)); 823 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 824 PetscCall(MatDestroy(&P)); 825 826 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 827 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) { 833 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 834 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 835 MPI_Comm comm; 836 PetscScalar *values; 837 DMBoundaryType bx, by; 838 ISLocalToGlobalMapping ltog; 839 DMDAStencilType st; 840 841 PetscFunctionBegin; 842 /* 843 nc - number of components per grid point 844 col - number of colors needed in one direction for single component problem 845 846 */ 847 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 848 col = 2 * s + 1; 849 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 850 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 851 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 852 853 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 854 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 855 856 PetscCall(MatSetBlockSize(J, nc)); 857 /* determine the matrix preallocation information */ 858 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 859 for (i = xs; i < xs + nx; i++) { 860 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 861 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 862 863 for (j = ys; j < ys + ny; j++) { 864 slot = i - gxs + gnx * (j - gys); 865 866 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 867 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 868 869 cnt = 0; 870 for (k = 0; k < nc; k++) { 871 for (l = lstart; l < lend + 1; l++) { 872 for (p = pstart; p < pend + 1; p++) { 873 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 874 cols[cnt++] = k + nc * (slot + gnx * l + p); 875 } 876 } 877 } 878 rows[k] = k + nc * (slot); 879 } 880 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 881 } 882 } 883 PetscCall(MatSetBlockSize(J, nc)); 884 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 885 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 886 MatPreallocateEnd(dnz, onz); 887 888 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 889 890 /* 891 For each node in the grid: we get the neighbors in the local (on processor ordering 892 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 893 PETSc ordering. 894 */ 895 if (!da->prealloc_only) { 896 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 897 for (i = xs; i < xs + nx; i++) { 898 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 899 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 900 901 for (j = ys; j < ys + ny; j++) { 902 slot = i - gxs + gnx * (j - gys); 903 904 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 905 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 906 907 cnt = 0; 908 for (k = 0; k < nc; k++) { 909 for (l = lstart; l < lend + 1; l++) { 910 for (p = pstart; p < pend + 1; p++) { 911 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 912 cols[cnt++] = k + nc * (slot + gnx * l + p); 913 } 914 } 915 } 916 rows[k] = k + nc * (slot); 917 } 918 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 919 } 920 } 921 PetscCall(PetscFree(values)); 922 /* do not copy values to GPU since they are all zero and not yet needed there */ 923 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 924 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 925 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 927 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 928 } 929 PetscCall(PetscFree2(rows, cols)); 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) { 934 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 935 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 936 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 937 MPI_Comm comm; 938 PetscScalar *values; 939 DMBoundaryType bx, by, bz; 940 ISLocalToGlobalMapping ltog; 941 DMDAStencilType st; 942 943 PetscFunctionBegin; 944 /* 945 nc - number of components per grid point 946 col - number of colors needed in one direction for single component problem 947 948 */ 949 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 950 col = 2 * s + 1; 951 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 952 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 953 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 954 955 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 956 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 957 958 PetscCall(MatSetBlockSize(J, nc)); 959 /* determine the matrix preallocation information */ 960 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 961 for (i = xs; i < xs + nx; i++) { 962 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 963 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 964 for (j = ys; j < ys + ny; j++) { 965 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 966 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 967 for (k = zs; k < zs + nz; k++) { 968 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 969 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 970 971 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 972 973 cnt = 0; 974 for (l = 0; l < nc; l++) { 975 for (ii = istart; ii < iend + 1; ii++) { 976 for (jj = jstart; jj < jend + 1; jj++) { 977 for (kk = kstart; kk < kend + 1; kk++) { 978 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 979 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 980 } 981 } 982 } 983 } 984 rows[l] = l + nc * (slot); 985 } 986 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 987 } 988 } 989 } 990 PetscCall(MatSetBlockSize(J, nc)); 991 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 992 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 993 MatPreallocateEnd(dnz, onz); 994 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 995 996 /* 997 For each node in the grid: we get the neighbors in the local (on processor ordering 998 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 999 PETSc ordering. 1000 */ 1001 if (!da->prealloc_only) { 1002 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1003 for (i = xs; i < xs + nx; i++) { 1004 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1005 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1006 for (j = ys; j < ys + ny; j++) { 1007 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1008 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1009 for (k = zs; k < zs + nz; k++) { 1010 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1011 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1012 1013 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1014 1015 cnt = 0; 1016 for (l = 0; l < nc; l++) { 1017 for (ii = istart; ii < iend + 1; ii++) { 1018 for (jj = jstart; jj < jend + 1; jj++) { 1019 for (kk = kstart; kk < kend + 1; kk++) { 1020 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1021 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1022 } 1023 } 1024 } 1025 } 1026 rows[l] = l + nc * (slot); 1027 } 1028 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1029 } 1030 } 1031 } 1032 PetscCall(PetscFree(values)); 1033 /* do not copy values to GPU since they are all zero and not yet needed there */ 1034 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1035 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1036 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1037 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1038 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1039 } 1040 PetscCall(PetscFree2(rows, cols)); 1041 PetscFunctionReturn(0); 1042 } 1043 1044 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) { 1045 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1046 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1047 MPI_Comm comm; 1048 DMBoundaryType bx, by; 1049 ISLocalToGlobalMapping ltog, mltog; 1050 DMDAStencilType st; 1051 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1052 1053 PetscFunctionBegin; 1054 /* 1055 nc - number of components per grid point 1056 col - number of colors needed in one direction for single component problem 1057 1058 */ 1059 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1060 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1061 col = 2 * s + 1; 1062 /* 1063 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1064 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1065 */ 1066 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1067 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1068 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1069 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1070 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1071 1072 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1073 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1074 1075 PetscCall(MatSetBlockSize(J, nc)); 1076 /* determine the matrix preallocation information */ 1077 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1078 for (i = xs; i < xs + nx; i++) { 1079 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1080 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1081 1082 for (j = ys; j < ys + ny; j++) { 1083 slot = i - gxs + gnx * (j - gys); 1084 1085 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1086 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1087 1088 cnt = 0; 1089 for (k = 0; k < nc; k++) { 1090 for (l = lstart; l < lend + 1; l++) { 1091 for (p = pstart; p < pend + 1; p++) { 1092 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1093 cols[cnt++] = k + nc * (slot + gnx * l + p); 1094 } 1095 } 1096 } 1097 rows[k] = k + nc * (slot); 1098 } 1099 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1100 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1101 } 1102 } 1103 PetscCall(MatSetBlockSize(J, nc)); 1104 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1105 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1106 MatPreallocateEnd(dnz, onz); 1107 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1108 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1109 1110 /* 1111 For each node in the grid: we get the neighbors in the local (on processor ordering 1112 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1113 PETSc ordering. 1114 */ 1115 if (!da->prealloc_only) { 1116 for (i = xs; i < xs + nx; i++) { 1117 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1118 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1119 1120 for (j = ys; j < ys + ny; j++) { 1121 slot = i - gxs + gnx * (j - gys); 1122 1123 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1124 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1125 1126 cnt = 0; 1127 for (l = lstart; l < lend + 1; l++) { 1128 for (p = pstart; p < pend + 1; p++) { 1129 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1130 cols[cnt++] = nc * (slot + gnx * l + p); 1131 for (k = 1; k < nc; k++) { 1132 cols[cnt] = 1 + cols[cnt - 1]; 1133 cnt++; 1134 } 1135 } 1136 } 1137 } 1138 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1139 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1140 } 1141 } 1142 /* do not copy values to GPU since they are all zero and not yet needed there */ 1143 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1144 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1145 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1146 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1147 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1148 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1149 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1150 } 1151 PetscCall(PetscFree2(rows, cols)); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) { 1156 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1157 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1158 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1159 DM_DA *dd = (DM_DA *)da->data; 1160 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1161 MPI_Comm comm; 1162 DMBoundaryType bx, by; 1163 ISLocalToGlobalMapping ltog; 1164 DMDAStencilType st; 1165 PetscBool removedups = PETSC_FALSE; 1166 1167 PetscFunctionBegin; 1168 /* 1169 nc - number of components per grid point 1170 col - number of colors needed in one direction for single component problem 1171 1172 */ 1173 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1174 col = 2 * s + 1; 1175 /* 1176 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1177 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1178 */ 1179 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1180 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1181 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1182 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1183 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1184 1185 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1186 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1187 1188 PetscCall(MatSetBlockSize(J, nc)); 1189 /* determine the matrix preallocation information */ 1190 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1191 for (i = xs; i < xs + nx; i++) { 1192 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1193 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1194 1195 for (j = ys; j < ys + ny; j++) { 1196 slot = i - gxs + gnx * (j - gys); 1197 1198 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1199 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1200 1201 for (k = 0; k < nc; k++) { 1202 cnt = 0; 1203 for (l = lstart; l < lend + 1; l++) { 1204 for (p = pstart; p < pend + 1; p++) { 1205 if (l || p) { 1206 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1207 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1208 } 1209 } else { 1210 if (dfill) { 1211 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1212 } else { 1213 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1214 } 1215 } 1216 } 1217 } 1218 row = k + nc * (slot); 1219 maxcnt = PetscMax(maxcnt, cnt); 1220 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1221 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1222 } 1223 } 1224 } 1225 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1226 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1227 MatPreallocateEnd(dnz, onz); 1228 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1229 1230 /* 1231 For each node in the grid: we get the neighbors in the local (on processor ordering 1232 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1233 PETSc ordering. 1234 */ 1235 if (!da->prealloc_only) { 1236 for (i = xs; i < xs + nx; i++) { 1237 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1238 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1239 1240 for (j = ys; j < ys + ny; j++) { 1241 slot = i - gxs + gnx * (j - gys); 1242 1243 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1244 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1245 1246 for (k = 0; k < nc; k++) { 1247 cnt = 0; 1248 for (l = lstart; l < lend + 1; l++) { 1249 for (p = pstart; p < pend + 1; p++) { 1250 if (l || p) { 1251 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1252 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1253 } 1254 } else { 1255 if (dfill) { 1256 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1257 } else { 1258 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1259 } 1260 } 1261 } 1262 } 1263 row = k + nc * (slot); 1264 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1265 } 1266 } 1267 } 1268 /* do not copy values to GPU since they are all zero and not yet needed there */ 1269 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1270 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1271 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1272 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1273 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1274 } 1275 PetscCall(PetscFree(cols)); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* ---------------------------------------------------------------------------------*/ 1280 1281 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) { 1282 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1283 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1284 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1285 MPI_Comm comm; 1286 DMBoundaryType bx, by, bz; 1287 ISLocalToGlobalMapping ltog, mltog; 1288 DMDAStencilType st; 1289 PetscBool removedups = PETSC_FALSE; 1290 1291 PetscFunctionBegin; 1292 /* 1293 nc - number of components per grid point 1294 col - number of colors needed in one direction for single component problem 1295 1296 */ 1297 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1298 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1299 col = 2 * s + 1; 1300 1301 /* 1302 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1303 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1304 */ 1305 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1306 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1307 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1308 1309 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1310 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1311 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1312 1313 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1314 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1315 1316 PetscCall(MatSetBlockSize(J, nc)); 1317 /* determine the matrix preallocation information */ 1318 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1319 for (i = xs; i < xs + nx; i++) { 1320 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1321 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1322 for (j = ys; j < ys + ny; j++) { 1323 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1324 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1325 for (k = zs; k < zs + nz; k++) { 1326 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1327 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1328 1329 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1330 1331 cnt = 0; 1332 for (l = 0; l < nc; l++) { 1333 for (ii = istart; ii < iend + 1; ii++) { 1334 for (jj = jstart; jj < jend + 1; jj++) { 1335 for (kk = kstart; kk < kend + 1; kk++) { 1336 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1337 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1338 } 1339 } 1340 } 1341 } 1342 rows[l] = l + nc * (slot); 1343 } 1344 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1345 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1346 } 1347 } 1348 } 1349 PetscCall(MatSetBlockSize(J, nc)); 1350 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1351 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1352 MatPreallocateEnd(dnz, onz); 1353 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1354 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1355 1356 /* 1357 For each node in the grid: we get the neighbors in the local (on processor ordering 1358 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1359 PETSc ordering. 1360 */ 1361 if (!da->prealloc_only) { 1362 for (i = xs; i < xs + nx; i++) { 1363 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1364 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1365 for (j = ys; j < ys + ny; j++) { 1366 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1367 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1368 for (k = zs; k < zs + nz; k++) { 1369 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1370 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1371 1372 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1373 1374 cnt = 0; 1375 for (kk = kstart; kk < kend + 1; kk++) { 1376 for (jj = jstart; jj < jend + 1; jj++) { 1377 for (ii = istart; ii < iend + 1; ii++) { 1378 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1379 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1380 for (l = 1; l < nc; l++) { 1381 cols[cnt] = 1 + cols[cnt - 1]; 1382 cnt++; 1383 } 1384 } 1385 } 1386 } 1387 } 1388 rows[0] = nc * (slot); 1389 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1390 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1391 } 1392 } 1393 } 1394 /* do not copy values to GPU since they are all zero and not yet needed there */ 1395 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1396 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1397 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1398 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1399 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1400 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1401 } 1402 PetscCall(PetscFree2(rows, cols)); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* ---------------------------------------------------------------------------------*/ 1407 1408 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) { 1409 DM_DA *dd = (DM_DA *)da->data; 1410 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1411 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1412 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1413 DMBoundaryType bx; 1414 ISLocalToGlobalMapping ltog; 1415 PetscMPIInt rank, size; 1416 1417 PetscFunctionBegin; 1418 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1419 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1420 1421 /* 1422 nc - number of components per grid point 1423 1424 */ 1425 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1426 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1427 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1428 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1429 1430 PetscCall(MatSetBlockSize(J, nc)); 1431 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1432 1433 /* 1434 note should be smaller for first and last process with no periodic 1435 does not handle dfill 1436 */ 1437 cnt = 0; 1438 /* coupling with process to the left */ 1439 for (i = 0; i < s; i++) { 1440 for (j = 0; j < nc; j++) { 1441 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1442 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1443 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1444 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1445 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1446 } 1447 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1448 cnt++; 1449 } 1450 } 1451 for (i = s; i < nx - s; i++) { 1452 for (j = 0; j < nc; j++) { 1453 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1454 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1455 cnt++; 1456 } 1457 } 1458 /* coupling with process to the right */ 1459 for (i = nx - s; i < nx; i++) { 1460 for (j = 0; j < nc; j++) { 1461 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1462 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1463 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1464 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1465 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1466 } 1467 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1468 cnt++; 1469 } 1470 } 1471 1472 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1473 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1474 PetscCall(PetscFree2(cols, ocols)); 1475 1476 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1477 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1478 1479 /* 1480 For each node in the grid: we get the neighbors in the local (on processor ordering 1481 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1482 PETSc ordering. 1483 */ 1484 if (!da->prealloc_only) { 1485 PetscCall(PetscMalloc1(maxcnt, &cols)); 1486 row = xs * nc; 1487 /* coupling with process to the left */ 1488 for (i = xs; i < xs + s; i++) { 1489 for (j = 0; j < nc; j++) { 1490 cnt = 0; 1491 if (rank) { 1492 for (l = 0; l < s; l++) { 1493 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1494 } 1495 } 1496 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1497 for (l = 0; l < s; l++) { 1498 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1499 } 1500 } 1501 if (dfill) { 1502 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1503 } else { 1504 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1505 } 1506 for (l = 0; l < s; l++) { 1507 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1508 } 1509 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1510 row++; 1511 } 1512 } 1513 for (i = xs + s; i < xs + nx - s; i++) { 1514 for (j = 0; j < nc; j++) { 1515 cnt = 0; 1516 for (l = 0; l < s; l++) { 1517 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1518 } 1519 if (dfill) { 1520 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1521 } else { 1522 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1523 } 1524 for (l = 0; l < s; l++) { 1525 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1526 } 1527 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1528 row++; 1529 } 1530 } 1531 /* coupling with process to the right */ 1532 for (i = xs + nx - s; i < xs + nx; i++) { 1533 for (j = 0; j < nc; j++) { 1534 cnt = 0; 1535 for (l = 0; l < s; l++) { 1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1537 } 1538 if (dfill) { 1539 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1540 } else { 1541 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1542 } 1543 if (rank < size - 1) { 1544 for (l = 0; l < s; l++) { 1545 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1546 } 1547 } 1548 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1549 for (l = 0; l < s; l++) { 1550 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1551 } 1552 } 1553 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1554 row++; 1555 } 1556 } 1557 PetscCall(PetscFree(cols)); 1558 /* do not copy values to GPU since they are all zero and not yet needed there */ 1559 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1560 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1561 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1562 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1563 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1564 } 1565 PetscFunctionReturn(0); 1566 } 1567 1568 /* ---------------------------------------------------------------------------------*/ 1569 1570 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) { 1571 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1572 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1573 PetscInt istart, iend; 1574 DMBoundaryType bx; 1575 ISLocalToGlobalMapping ltog, mltog; 1576 1577 PetscFunctionBegin; 1578 /* 1579 nc - number of components per grid point 1580 col - number of colors needed in one direction for single component problem 1581 1582 */ 1583 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1584 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1585 col = 2 * s + 1; 1586 1587 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1588 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1589 1590 PetscCall(MatSetBlockSize(J, nc)); 1591 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1592 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1593 1594 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1595 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1596 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1597 1598 /* 1599 For each node in the grid: we get the neighbors in the local (on processor ordering 1600 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1601 PETSc ordering. 1602 */ 1603 if (!da->prealloc_only) { 1604 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1605 for (i = xs; i < xs + nx; i++) { 1606 istart = PetscMax(-s, gxs - i); 1607 iend = PetscMin(s, gxs + gnx - i - 1); 1608 slot = i - gxs; 1609 1610 cnt = 0; 1611 for (i1 = istart; i1 < iend + 1; i1++) { 1612 cols[cnt++] = nc * (slot + i1); 1613 for (l = 1; l < nc; l++) { 1614 cols[cnt] = 1 + cols[cnt - 1]; 1615 cnt++; 1616 } 1617 } 1618 rows[0] = nc * (slot); 1619 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1620 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1621 } 1622 /* do not copy values to GPU since they are all zero and not yet needed there */ 1623 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1624 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1625 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1626 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1627 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1628 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1629 PetscCall(PetscFree2(rows, cols)); 1630 } 1631 PetscFunctionReturn(0); 1632 } 1633 1634 /* ---------------------------------------------------------------------------------*/ 1635 1636 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) { 1637 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1638 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1639 PetscInt istart, iend; 1640 DMBoundaryType bx; 1641 ISLocalToGlobalMapping ltog, mltog; 1642 1643 PetscFunctionBegin; 1644 /* 1645 nc - number of components per grid point 1646 col - number of colors needed in one direction for single component problem 1647 */ 1648 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1649 col = 2 * s + 1; 1650 1651 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1652 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1653 1654 PetscCall(MatSetBlockSize(J, nc)); 1655 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1656 1657 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1658 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1659 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1660 1661 /* 1662 For each node in the grid: we get the neighbors in the local (on processor ordering 1663 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1664 PETSc ordering. 1665 */ 1666 if (!da->prealloc_only) { 1667 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1668 for (i = xs; i < xs + nx; i++) { 1669 istart = PetscMax(-s, gxs - i); 1670 iend = PetscMin(s, gxs + gnx - i - 1); 1671 slot = i - gxs; 1672 1673 cnt = 0; 1674 for (i1 = istart; i1 < iend + 1; i1++) { 1675 cols[cnt++] = nc * (slot + i1); 1676 for (l = 1; l < nc; l++) { 1677 cols[cnt] = 1 + cols[cnt - 1]; 1678 cnt++; 1679 } 1680 } 1681 rows[0] = nc * (slot); 1682 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1683 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1684 } 1685 /* do not copy values to GPU since they are all zero and not yet needed there */ 1686 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1687 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1688 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1689 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1690 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1691 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1692 PetscCall(PetscFree2(rows, cols)); 1693 } 1694 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) { 1699 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1700 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1701 PetscInt istart, iend, jstart, jend, ii, jj; 1702 MPI_Comm comm; 1703 PetscScalar *values; 1704 DMBoundaryType bx, by; 1705 DMDAStencilType st; 1706 ISLocalToGlobalMapping ltog; 1707 1708 PetscFunctionBegin; 1709 /* 1710 nc - number of components per grid point 1711 col - number of colors needed in one direction for single component problem 1712 */ 1713 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1714 col = 2 * s + 1; 1715 1716 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1717 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1718 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1719 1720 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1721 1722 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1723 1724 /* determine the matrix preallocation information */ 1725 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1726 for (i = xs; i < xs + nx; i++) { 1727 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1728 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1729 for (j = ys; j < ys + ny; j++) { 1730 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1731 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1732 slot = i - gxs + gnx * (j - gys); 1733 1734 /* Find block columns in block row */ 1735 cnt = 0; 1736 for (ii = istart; ii < iend + 1; ii++) { 1737 for (jj = jstart; jj < jend + 1; jj++) { 1738 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1739 cols[cnt++] = slot + ii + gnx * jj; 1740 } 1741 } 1742 } 1743 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1744 } 1745 } 1746 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1747 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1748 MatPreallocateEnd(dnz, onz); 1749 1750 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1751 1752 /* 1753 For each node in the grid: we get the neighbors in the local (on processor ordering 1754 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1755 PETSc ordering. 1756 */ 1757 if (!da->prealloc_only) { 1758 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1759 for (i = xs; i < xs + nx; i++) { 1760 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1761 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1762 for (j = ys; j < ys + ny; j++) { 1763 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1764 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1765 slot = i - gxs + gnx * (j - gys); 1766 cnt = 0; 1767 for (ii = istart; ii < iend + 1; ii++) { 1768 for (jj = jstart; jj < jend + 1; jj++) { 1769 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1770 cols[cnt++] = slot + ii + gnx * jj; 1771 } 1772 } 1773 } 1774 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1775 } 1776 } 1777 PetscCall(PetscFree(values)); 1778 /* do not copy values to GPU since they are all zero and not yet needed there */ 1779 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1780 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1781 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1782 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1783 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1784 } 1785 PetscCall(PetscFree(cols)); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) { 1790 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1791 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1792 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1793 MPI_Comm comm; 1794 PetscScalar *values; 1795 DMBoundaryType bx, by, bz; 1796 DMDAStencilType st; 1797 ISLocalToGlobalMapping ltog; 1798 1799 PetscFunctionBegin; 1800 /* 1801 nc - number of components per grid point 1802 col - number of colors needed in one direction for single component problem 1803 1804 */ 1805 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1806 col = 2 * s + 1; 1807 1808 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1809 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1810 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1811 1812 PetscCall(PetscMalloc1(col * col * col, &cols)); 1813 1814 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1815 1816 /* determine the matrix preallocation information */ 1817 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1818 for (i = xs; i < xs + nx; i++) { 1819 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1820 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1821 for (j = ys; j < ys + ny; j++) { 1822 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1823 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1824 for (k = zs; k < zs + nz; k++) { 1825 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1826 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1827 1828 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1829 1830 /* Find block columns in block row */ 1831 cnt = 0; 1832 for (ii = istart; ii < iend + 1; ii++) { 1833 for (jj = jstart; jj < jend + 1; jj++) { 1834 for (kk = kstart; kk < kend + 1; kk++) { 1835 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1836 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1837 } 1838 } 1839 } 1840 } 1841 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1842 } 1843 } 1844 } 1845 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1846 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1847 MatPreallocateEnd(dnz, onz); 1848 1849 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1850 1851 /* 1852 For each node in the grid: we get the neighbors in the local (on processor ordering 1853 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1854 PETSc ordering. 1855 */ 1856 if (!da->prealloc_only) { 1857 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1858 for (i = xs; i < xs + nx; i++) { 1859 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1860 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1861 for (j = ys; j < ys + ny; j++) { 1862 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1863 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1864 for (k = zs; k < zs + nz; k++) { 1865 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1866 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1867 1868 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1869 1870 cnt = 0; 1871 for (ii = istart; ii < iend + 1; ii++) { 1872 for (jj = jstart; jj < jend + 1; jj++) { 1873 for (kk = kstart; kk < kend + 1; kk++) { 1874 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1875 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1876 } 1877 } 1878 } 1879 } 1880 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1881 } 1882 } 1883 } 1884 PetscCall(PetscFree(values)); 1885 /* do not copy values to GPU since they are all zero and not yet needed there */ 1886 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1887 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1888 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1889 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1890 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1891 } 1892 PetscCall(PetscFree(cols)); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 /* 1897 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1898 identify in the local ordering with periodic domain. 1899 */ 1900 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) { 1901 PetscInt i, n; 1902 1903 PetscFunctionBegin; 1904 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1905 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1906 for (i = 0, n = 0; i < *cnt; i++) { 1907 if (col[i] >= *row) col[n++] = col[i]; 1908 } 1909 *cnt = n; 1910 PetscFunctionReturn(0); 1911 } 1912 1913 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) { 1914 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1915 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1916 PetscInt istart, iend, jstart, jend, ii, jj; 1917 MPI_Comm comm; 1918 PetscScalar *values; 1919 DMBoundaryType bx, by; 1920 DMDAStencilType st; 1921 ISLocalToGlobalMapping ltog; 1922 1923 PetscFunctionBegin; 1924 /* 1925 nc - number of components per grid point 1926 col - number of colors needed in one direction for single component problem 1927 */ 1928 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1929 col = 2 * s + 1; 1930 1931 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1932 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1933 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1934 1935 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1936 1937 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1938 1939 /* determine the matrix preallocation information */ 1940 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1941 for (i = xs; i < xs + nx; i++) { 1942 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1943 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1944 for (j = ys; j < ys + ny; j++) { 1945 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1946 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1947 slot = i - gxs + gnx * (j - gys); 1948 1949 /* Find block columns in block row */ 1950 cnt = 0; 1951 for (ii = istart; ii < iend + 1; ii++) { 1952 for (jj = jstart; jj < jend + 1; jj++) { 1953 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1954 } 1955 } 1956 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1957 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1958 } 1959 } 1960 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1961 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1962 MatPreallocateEnd(dnz, onz); 1963 1964 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1965 1966 /* 1967 For each node in the grid: we get the neighbors in the local (on processor ordering 1968 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1969 PETSc ordering. 1970 */ 1971 if (!da->prealloc_only) { 1972 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1973 for (i = xs; i < xs + nx; i++) { 1974 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1975 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1976 for (j = ys; j < ys + ny; j++) { 1977 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1978 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1979 slot = i - gxs + gnx * (j - gys); 1980 1981 /* Find block columns in block row */ 1982 cnt = 0; 1983 for (ii = istart; ii < iend + 1; ii++) { 1984 for (jj = jstart; jj < jend + 1; jj++) { 1985 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1986 } 1987 } 1988 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1989 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1990 } 1991 } 1992 PetscCall(PetscFree(values)); 1993 /* do not copy values to GPU since they are all zero and not yet needed there */ 1994 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1995 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1996 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1997 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1998 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1999 } 2000 PetscCall(PetscFree(cols)); 2001 PetscFunctionReturn(0); 2002 } 2003 2004 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) { 2005 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2006 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2007 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2008 MPI_Comm comm; 2009 PetscScalar *values; 2010 DMBoundaryType bx, by, bz; 2011 DMDAStencilType st; 2012 ISLocalToGlobalMapping ltog; 2013 2014 PetscFunctionBegin; 2015 /* 2016 nc - number of components per grid point 2017 col - number of colors needed in one direction for single component problem 2018 */ 2019 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2020 col = 2 * s + 1; 2021 2022 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2023 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2024 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2025 2026 /* create the matrix */ 2027 PetscCall(PetscMalloc1(col * col * col, &cols)); 2028 2029 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2030 2031 /* determine the matrix preallocation information */ 2032 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2033 for (i = xs; i < xs + nx; i++) { 2034 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2035 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2036 for (j = ys; j < ys + ny; j++) { 2037 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2038 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2039 for (k = zs; k < zs + nz; k++) { 2040 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2041 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2042 2043 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2044 2045 /* Find block columns in block row */ 2046 cnt = 0; 2047 for (ii = istart; ii < iend + 1; ii++) { 2048 for (jj = jstart; jj < jend + 1; jj++) { 2049 for (kk = kstart; kk < kend + 1; kk++) { 2050 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2051 } 2052 } 2053 } 2054 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2055 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2056 } 2057 } 2058 } 2059 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2060 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2061 MatPreallocateEnd(dnz, onz); 2062 2063 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2064 2065 /* 2066 For each node in the grid: we get the neighbors in the local (on processor ordering 2067 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2068 PETSc ordering. 2069 */ 2070 if (!da->prealloc_only) { 2071 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2072 for (i = xs; i < xs + nx; i++) { 2073 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2074 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2075 for (j = ys; j < ys + ny; j++) { 2076 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2077 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2078 for (k = zs; k < zs + nz; k++) { 2079 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2080 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2081 2082 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2083 2084 cnt = 0; 2085 for (ii = istart; ii < iend + 1; ii++) { 2086 for (jj = jstart; jj < jend + 1; jj++) { 2087 for (kk = kstart; kk < kend + 1; kk++) { 2088 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2089 } 2090 } 2091 } 2092 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2093 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2094 } 2095 } 2096 } 2097 PetscCall(PetscFree(values)); 2098 /* do not copy values to GPU since they are all zero and not yet needed there */ 2099 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2100 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2101 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2102 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2103 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2104 } 2105 PetscCall(PetscFree(cols)); 2106 PetscFunctionReturn(0); 2107 } 2108 2109 /* ---------------------------------------------------------------------------------*/ 2110 2111 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) { 2112 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2113 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2114 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2115 DM_DA *dd = (DM_DA *)da->data; 2116 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2117 MPI_Comm comm; 2118 PetscScalar *values; 2119 DMBoundaryType bx, by, bz; 2120 ISLocalToGlobalMapping ltog; 2121 DMDAStencilType st; 2122 PetscBool removedups = PETSC_FALSE; 2123 2124 PetscFunctionBegin; 2125 /* 2126 nc - number of components per grid point 2127 col - number of colors needed in one direction for single component problem 2128 2129 */ 2130 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2131 col = 2 * s + 1; 2132 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 2133 by 2*stencil_width + 1\n"); 2134 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 2135 by 2*stencil_width + 1\n"); 2136 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 2137 by 2*stencil_width + 1\n"); 2138 2139 /* 2140 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2141 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2142 */ 2143 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2144 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2145 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2146 2147 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2148 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2149 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2150 2151 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2152 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2153 2154 /* determine the matrix preallocation information */ 2155 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2156 2157 PetscCall(MatSetBlockSize(J, nc)); 2158 for (i = xs; i < xs + nx; i++) { 2159 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2160 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2161 for (j = ys; j < ys + ny; j++) { 2162 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2163 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2164 for (k = zs; k < zs + nz; k++) { 2165 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2166 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2167 2168 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2169 2170 for (l = 0; l < nc; l++) { 2171 cnt = 0; 2172 for (ii = istart; ii < iend + 1; ii++) { 2173 for (jj = jstart; jj < jend + 1; jj++) { 2174 for (kk = kstart; kk < kend + 1; kk++) { 2175 if (ii || jj || kk) { 2176 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2177 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2178 } 2179 } else { 2180 if (dfill) { 2181 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2182 } else { 2183 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2184 } 2185 } 2186 } 2187 } 2188 } 2189 row = l + nc * (slot); 2190 maxcnt = PetscMax(maxcnt, cnt); 2191 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2192 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2193 } 2194 } 2195 } 2196 } 2197 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2198 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2199 MatPreallocateEnd(dnz, onz); 2200 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2201 2202 /* 2203 For each node in the grid: we get the neighbors in the local (on processor ordering 2204 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2205 PETSc ordering. 2206 */ 2207 if (!da->prealloc_only) { 2208 PetscCall(PetscCalloc1(maxcnt, &values)); 2209 for (i = xs; i < xs + nx; i++) { 2210 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2211 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2212 for (j = ys; j < ys + ny; j++) { 2213 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2214 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2215 for (k = zs; k < zs + nz; k++) { 2216 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2217 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2218 2219 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2220 2221 for (l = 0; l < nc; l++) { 2222 cnt = 0; 2223 for (ii = istart; ii < iend + 1; ii++) { 2224 for (jj = jstart; jj < jend + 1; jj++) { 2225 for (kk = kstart; kk < kend + 1; kk++) { 2226 if (ii || jj || kk) { 2227 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2228 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2229 } 2230 } else { 2231 if (dfill) { 2232 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2233 } else { 2234 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2235 } 2236 } 2237 } 2238 } 2239 } 2240 row = l + nc * (slot); 2241 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2242 } 2243 } 2244 } 2245 } 2246 PetscCall(PetscFree(values)); 2247 /* do not copy values to GPU since they are all zero and not yet needed there */ 2248 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2249 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2250 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2251 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2252 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2253 } 2254 PetscCall(PetscFree(cols)); 2255 PetscFunctionReturn(0); 2256 } 2257