1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for `dfill` and `ofill` is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the `dfill`, `ofill` format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 264 { 265 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 266 PetscInt ncolors = 0; 267 MPI_Comm comm; 268 DMBoundaryType bx, by; 269 DMDAStencilType st; 270 ISColoringValue *colors; 271 DM_DA *dd = (DM_DA *)da->data; 272 273 PetscFunctionBegin; 274 /* 275 nc - number of components per grid point 276 col - number of colors needed in one direction for single component problem 277 278 */ 279 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 280 col = 2 * s + 1; 281 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 282 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 283 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 284 285 /* special case as taught to us by Paul Hovland */ 286 if (st == DMDA_STENCIL_STAR && s == 1) { 287 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 288 } else { 289 if (ctype == IS_COLORING_GLOBAL) { 290 if (!dd->localcoloring) { 291 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 292 ii = 0; 293 for (j = ys; j < ys + ny; j++) { 294 for (i = xs; i < xs + nx; i++) { 295 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 296 } 297 } 298 ncolors = nc + nc * (col - 1 + col * (col - 1)); 299 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 300 } 301 *coloring = dd->localcoloring; 302 } else if (ctype == IS_COLORING_LOCAL) { 303 if (!dd->ghostedcoloring) { 304 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 305 ii = 0; 306 for (j = gys; j < gys + gny; j++) { 307 for (i = gxs; i < gxs + gnx; i++) { 308 for (k = 0; k < nc; k++) { 309 /* the complicated stuff is to handle periodic boundaries */ 310 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 311 } 312 } 313 } 314 ncolors = nc + nc * (col - 1 + col * (col - 1)); 315 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 316 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 317 318 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 319 } 320 *coloring = dd->ghostedcoloring; 321 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 322 } 323 PetscCall(ISColoringReference(*coloring)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 328 { 329 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 330 PetscInt ncolors; 331 MPI_Comm comm; 332 DMBoundaryType bx, by, bz; 333 DMDAStencilType st; 334 ISColoringValue *colors; 335 DM_DA *dd = (DM_DA *)da->data; 336 337 PetscFunctionBegin; 338 /* 339 nc - number of components per grid point 340 col - number of colors needed in one direction for single component problem 341 */ 342 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 343 col = 2 * s + 1; 344 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 345 by 2*stencil_width + 1\n"); 346 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 347 by 2*stencil_width + 1\n"); 348 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 349 by 2*stencil_width + 1\n"); 350 351 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 352 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 353 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 354 355 /* create the coloring */ 356 if (ctype == IS_COLORING_GLOBAL) { 357 if (!dd->localcoloring) { 358 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 359 ii = 0; 360 for (k = zs; k < zs + nz; k++) { 361 for (j = ys; j < ys + ny; j++) { 362 for (i = xs; i < xs + nx; i++) { 363 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 364 } 365 } 366 } 367 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 368 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 369 } 370 *coloring = dd->localcoloring; 371 } else if (ctype == IS_COLORING_LOCAL) { 372 if (!dd->ghostedcoloring) { 373 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 374 ii = 0; 375 for (k = gzs; k < gzs + gnz; k++) { 376 for (j = gys; j < gys + gny; j++) { 377 for (i = gxs; i < gxs + gnx; i++) { 378 for (l = 0; l < nc; l++) { 379 /* the complicated stuff is to handle periodic boundaries */ 380 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 386 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 387 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 388 } 389 *coloring = dd->ghostedcoloring; 390 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 391 PetscCall(ISColoringReference(*coloring)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 396 { 397 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 398 PetscInt ncolors; 399 MPI_Comm comm; 400 DMBoundaryType bx; 401 ISColoringValue *colors; 402 DM_DA *dd = (DM_DA *)da->data; 403 404 PetscFunctionBegin; 405 /* 406 nc - number of components per grid point 407 col - number of colors needed in one direction for single component problem 408 */ 409 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 410 col = 2 * s + 1; 411 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 412 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 413 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 414 415 /* create the coloring */ 416 if (ctype == IS_COLORING_GLOBAL) { 417 if (!dd->localcoloring) { 418 PetscCall(PetscMalloc1(nc * nx, &colors)); 419 if (dd->ofillcols) { 420 PetscInt tc = 0; 421 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 422 i1 = 0; 423 for (i = xs; i < xs + nx; i++) { 424 for (l = 0; l < nc; l++) { 425 if (dd->ofillcols[l] && (i % col)) { 426 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 427 } else { 428 colors[i1++] = l; 429 } 430 } 431 } 432 ncolors = nc + 2 * s * tc; 433 } else { 434 i1 = 0; 435 for (i = xs; i < xs + nx; i++) { 436 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 437 } 438 ncolors = nc + nc * (col - 1); 439 } 440 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 441 } 442 *coloring = dd->localcoloring; 443 } else if (ctype == IS_COLORING_LOCAL) { 444 if (!dd->ghostedcoloring) { 445 PetscCall(PetscMalloc1(nc * gnx, &colors)); 446 i1 = 0; 447 for (i = gxs; i < gxs + gnx; i++) { 448 for (l = 0; l < nc; l++) { 449 /* the complicated stuff is to handle periodic boundaries */ 450 colors[i1++] = l + nc * (SetInRange(i, m) % col); 451 } 452 } 453 ncolors = nc + nc * (col - 1); 454 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 455 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 456 } 457 *coloring = dd->ghostedcoloring; 458 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 459 PetscCall(ISColoringReference(*coloring)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 464 { 465 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 466 PetscInt ncolors; 467 MPI_Comm comm; 468 DMBoundaryType bx, by; 469 ISColoringValue *colors; 470 DM_DA *dd = (DM_DA *)da->data; 471 472 PetscFunctionBegin; 473 /* 474 nc - number of components per grid point 475 col - number of colors needed in one direction for single component problem 476 */ 477 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 478 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 479 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 480 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 481 /* create the coloring */ 482 if (ctype == IS_COLORING_GLOBAL) { 483 if (!dd->localcoloring) { 484 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 485 ii = 0; 486 for (j = ys; j < ys + ny; j++) { 487 for (i = xs; i < xs + nx; i++) { 488 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 489 } 490 } 491 ncolors = 5 * nc; 492 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 493 } 494 *coloring = dd->localcoloring; 495 } else if (ctype == IS_COLORING_LOCAL) { 496 if (!dd->ghostedcoloring) { 497 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 498 ii = 0; 499 for (j = gys; j < gys + gny; j++) { 500 for (i = gxs; i < gxs + gnx; i++) { 501 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 502 } 503 } 504 ncolors = 5 * nc; 505 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 506 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 507 } 508 *coloring = dd->ghostedcoloring; 509 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 /* =========================================================================== */ 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 528 529 /*@C 530 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 531 532 Logically Collective 533 534 Input Parameters: 535 + mat - the matrix 536 - da - the da 537 538 Level: intermediate 539 540 .seealso: `DMDA`, `Mat`, `MatSetUp()` 541 @*/ 542 PetscErrorCode MatSetupDM(Mat mat, DM da) 543 { 544 PetscFunctionBegin; 545 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 546 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 547 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 552 { 553 DM da; 554 const char *prefix; 555 Mat Anatural; 556 AO ao; 557 PetscInt rstart, rend, *petsc, i; 558 IS is; 559 MPI_Comm comm; 560 PetscViewerFormat format; 561 562 PetscFunctionBegin; 563 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 564 PetscCall(PetscViewerGetFormat(viewer, &format)); 565 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 566 567 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 568 PetscCall(MatGetDM(A, &da)); 569 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 570 571 PetscCall(DMDAGetAO(da, &ao)); 572 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 573 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 574 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 575 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 576 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 577 578 /* call viewer on natural ordering */ 579 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 580 PetscCall(ISDestroy(&is)); 581 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 582 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 583 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 584 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 585 PetscCall(MatView(Anatural, viewer)); 586 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 587 PetscCall(MatDestroy(&Anatural)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 592 { 593 DM da; 594 Mat Anatural, Aapp; 595 AO ao; 596 PetscInt rstart, rend, *app, i, m, n, M, N; 597 IS is; 598 MPI_Comm comm; 599 600 PetscFunctionBegin; 601 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 602 PetscCall(MatGetDM(A, &da)); 603 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 604 605 /* Load the matrix in natural ordering */ 606 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 607 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 608 PetscCall(MatGetSize(A, &M, &N)); 609 PetscCall(MatGetLocalSize(A, &m, &n)); 610 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 611 PetscCall(MatLoad(Anatural, viewer)); 612 613 /* Map natural ordering to application ordering and create IS */ 614 PetscCall(DMDAGetAO(da, &ao)); 615 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 616 PetscCall(PetscMalloc1(rend - rstart, &app)); 617 for (i = rstart; i < rend; i++) app[i - rstart] = i; 618 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 619 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 620 621 /* Do permutation and replace header */ 622 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 623 PetscCall(MatHeaderReplace(A, &Aapp)); 624 PetscCall(ISDestroy(&is)); 625 PetscCall(MatDestroy(&Anatural)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 630 { 631 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 632 Mat A; 633 MPI_Comm comm; 634 MatType Atype; 635 MatType mtype; 636 PetscMPIInt size; 637 DM_DA *dd = (DM_DA *)da->data; 638 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 639 640 PetscFunctionBegin; 641 PetscCall(MatInitializePackage()); 642 mtype = da->mattype; 643 644 /* 645 m 646 ------------------------------------------------------ 647 | | 648 | | 649 | ---------------------- | 650 | | | | 651 n | ny | | | 652 | | | | 653 | .--------------------- | 654 | (xs,ys) nx | 655 | . | 656 | (gxs,gys) | 657 | | 658 ----------------------------------------------------- 659 */ 660 661 /* 662 nc - number of components per grid point 663 col - number of colors needed in one direction for single component problem 664 */ 665 M = dd->M; 666 N = dd->N; 667 P = dd->P; 668 dim = da->dim; 669 dof = dd->w; 670 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 671 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 672 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 673 PetscCall(MatCreate(comm, &A)); 674 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 675 PetscCall(MatSetType(A, mtype)); 676 PetscCall(MatSetFromOptions(A)); 677 if (dof * nx * ny * nz < da->bind_below) { 678 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 679 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 680 } 681 PetscCall(MatSetDM(A, da)); 682 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 683 PetscCall(MatGetType(A, &Atype)); 684 /* 685 We do not provide a getmatrix function in the DMDA operations because 686 the basic DMDA does not know about matrices. We think of DMDA as being more 687 more low-level than matrices. This is kind of cheating but, cause sometimes 688 we think of DMDA has higher level than matrices. 689 690 We could switch based on Atype (or mtype), but we do not since the 691 specialized setting routines depend only on the particular preallocation 692 details of the matrix, not the type itself. 693 */ 694 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 695 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 696 if (!aij) { 697 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 698 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 699 if (!baij) { 700 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 701 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 702 if (!sbaij) { 703 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 704 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 705 } 706 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 707 } 708 } 709 if (aij) { 710 if (dim == 1) { 711 if (dd->ofill) { 712 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 713 } else { 714 DMBoundaryType bx; 715 PetscMPIInt size; 716 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 717 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 718 if (size == 1 && bx == DM_BOUNDARY_NONE) { 719 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 720 } else { 721 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 722 } 723 } 724 } else if (dim == 2) { 725 if (dd->ofill) { 726 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 727 } else { 728 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 729 } 730 } else if (dim == 3) { 731 if (dd->ofill) { 732 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 733 } else { 734 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 735 } 736 } 737 } else if (baij) { 738 if (dim == 2) { 739 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 740 } else if (dim == 3) { 741 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 742 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 743 } else if (sbaij) { 744 if (dim == 2) { 745 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 746 } else if (dim == 3) { 747 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 748 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 749 } else if (sell) { 750 if (dim == 2) { 751 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 752 } else if (dim == 3) { 753 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 754 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 755 } else if (is) { 756 PetscCall(DMCreateMatrix_DA_IS(da, A)); 757 } else { 758 ISLocalToGlobalMapping ltog; 759 760 PetscCall(MatSetBlockSize(A, dof)); 761 PetscCall(MatSetUp(A)); 762 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 763 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 764 } 765 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 766 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 767 PetscCall(MatSetDM(A, da)); 768 PetscCallMPI(MPI_Comm_size(comm, &size)); 769 if (size > 1) { 770 /* change viewer to display matrix in natural ordering */ 771 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 772 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 773 } 774 *J = A; 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 779 780 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 781 { 782 DM_DA *da = (DM_DA *)dm->data; 783 Mat lJ, P; 784 ISLocalToGlobalMapping ltog; 785 IS is; 786 PetscBT bt; 787 const PetscInt *e_loc, *idx; 788 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 789 790 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 791 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 792 PetscFunctionBegin; 793 dof = da->w; 794 PetscCall(MatSetBlockSize(J, dof)); 795 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 796 797 /* flag local elements indices in local DMDA numbering */ 798 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 799 PetscCall(PetscBTCreate(nv / dof, &bt)); 800 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 801 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 802 803 /* filter out (set to -1) the global indices not used by the local elements */ 804 PetscCall(PetscMalloc1(nv / dof, &gidx)); 805 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 806 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 807 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 808 for (i = 0; i < nv / dof; i++) 809 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 810 PetscCall(PetscBTDestroy(&bt)); 811 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 812 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 813 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 814 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 815 PetscCall(ISDestroy(&is)); 816 817 /* Preallocation */ 818 if (dm->prealloc_skip) { 819 PetscCall(MatSetUp(J)); 820 } else { 821 PetscCall(MatISGetLocalMat(J, &lJ)); 822 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 823 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 824 PetscCall(MatSetType(P, MATPREALLOCATOR)); 825 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 826 PetscCall(MatGetSize(lJ, &N, NULL)); 827 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 828 PetscCall(MatSetSizes(P, n, n, N, N)); 829 PetscCall(MatSetBlockSize(P, dof)); 830 PetscCall(MatSetUp(P)); 831 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 832 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 833 PetscCall(MatISRestoreLocalMat(J, &lJ)); 834 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 835 PetscCall(MatDestroy(&P)); 836 837 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 839 } 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 844 { 845 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 846 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 847 MPI_Comm comm; 848 PetscScalar *values; 849 DMBoundaryType bx, by; 850 ISLocalToGlobalMapping ltog; 851 DMDAStencilType st; 852 853 PetscFunctionBegin; 854 /* 855 nc - number of components per grid point 856 col - number of colors needed in one direction for single component problem 857 */ 858 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 859 col = 2 * s + 1; 860 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 861 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 862 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 863 864 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 865 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 866 867 PetscCall(MatSetBlockSize(J, nc)); 868 /* determine the matrix preallocation information */ 869 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 870 for (i = xs; i < xs + nx; i++) { 871 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 872 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 873 874 for (j = ys; j < ys + ny; j++) { 875 slot = i - gxs + gnx * (j - gys); 876 877 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 878 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 879 880 cnt = 0; 881 for (k = 0; k < nc; k++) { 882 for (l = lstart; l < lend + 1; l++) { 883 for (p = pstart; p < pend + 1; p++) { 884 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 885 cols[cnt++] = k + nc * (slot + gnx * l + p); 886 } 887 } 888 } 889 rows[k] = k + nc * (slot); 890 } 891 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 892 } 893 } 894 PetscCall(MatSetBlockSize(J, nc)); 895 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 896 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 897 MatPreallocateEnd(dnz, onz); 898 899 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 900 901 /* 902 For each node in the grid: we get the neighbors in the local (on processor ordering 903 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 904 PETSc ordering. 905 */ 906 if (!da->prealloc_only) { 907 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 908 for (i = xs; i < xs + nx; i++) { 909 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 910 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 911 912 for (j = ys; j < ys + ny; j++) { 913 slot = i - gxs + gnx * (j - gys); 914 915 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 916 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 917 918 cnt = 0; 919 for (k = 0; k < nc; k++) { 920 for (l = lstart; l < lend + 1; l++) { 921 for (p = pstart; p < pend + 1; p++) { 922 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 923 cols[cnt++] = k + nc * (slot + gnx * l + p); 924 } 925 } 926 } 927 rows[k] = k + nc * (slot); 928 } 929 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 930 } 931 } 932 PetscCall(PetscFree(values)); 933 /* do not copy values to GPU since they are all zero and not yet needed there */ 934 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 935 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 936 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 937 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 938 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 939 } 940 PetscCall(PetscFree2(rows, cols)); 941 PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 945 { 946 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 947 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 948 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 949 MPI_Comm comm; 950 PetscScalar *values; 951 DMBoundaryType bx, by, bz; 952 ISLocalToGlobalMapping ltog; 953 DMDAStencilType st; 954 955 PetscFunctionBegin; 956 /* 957 nc - number of components per grid point 958 col - number of colors needed in one direction for single component problem 959 */ 960 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 961 col = 2 * s + 1; 962 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 963 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 964 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 965 966 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 967 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 968 969 PetscCall(MatSetBlockSize(J, nc)); 970 /* determine the matrix preallocation information */ 971 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 972 for (i = xs; i < xs + nx; i++) { 973 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 974 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 975 for (j = ys; j < ys + ny; j++) { 976 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 977 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 978 for (k = zs; k < zs + nz; k++) { 979 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 980 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 981 982 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 983 984 cnt = 0; 985 for (l = 0; l < nc; l++) { 986 for (ii = istart; ii < iend + 1; ii++) { 987 for (jj = jstart; jj < jend + 1; jj++) { 988 for (kk = kstart; kk < kend + 1; kk++) { 989 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 990 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 991 } 992 } 993 } 994 } 995 rows[l] = l + nc * (slot); 996 } 997 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 998 } 999 } 1000 } 1001 PetscCall(MatSetBlockSize(J, nc)); 1002 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1003 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1004 MatPreallocateEnd(dnz, onz); 1005 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1006 1007 /* 1008 For each node in the grid: we get the neighbors in the local (on processor ordering 1009 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1010 PETSc ordering. 1011 */ 1012 if (!da->prealloc_only) { 1013 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1014 for (i = xs; i < xs + nx; i++) { 1015 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1016 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1017 for (j = ys; j < ys + ny; j++) { 1018 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1019 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1020 for (k = zs; k < zs + nz; k++) { 1021 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1022 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1023 1024 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1025 1026 cnt = 0; 1027 for (l = 0; l < nc; l++) { 1028 for (ii = istart; ii < iend + 1; ii++) { 1029 for (jj = jstart; jj < jend + 1; jj++) { 1030 for (kk = kstart; kk < kend + 1; kk++) { 1031 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1032 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1033 } 1034 } 1035 } 1036 } 1037 rows[l] = l + nc * (slot); 1038 } 1039 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1040 } 1041 } 1042 } 1043 PetscCall(PetscFree(values)); 1044 /* do not copy values to GPU since they are all zero and not yet needed there */ 1045 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1046 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1047 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1048 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1049 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1050 } 1051 PetscCall(PetscFree2(rows, cols)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1056 { 1057 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1058 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1059 MPI_Comm comm; 1060 DMBoundaryType bx, by; 1061 ISLocalToGlobalMapping ltog, mltog; 1062 DMDAStencilType st; 1063 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1064 1065 PetscFunctionBegin; 1066 /* 1067 nc - number of components per grid point 1068 col - number of colors needed in one direction for single component problem 1069 */ 1070 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1071 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1072 col = 2 * s + 1; 1073 /* 1074 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1075 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1076 */ 1077 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1078 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1079 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1080 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1081 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1082 1083 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1084 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1085 1086 PetscCall(MatSetBlockSize(J, nc)); 1087 /* determine the matrix preallocation information */ 1088 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1089 for (i = xs; i < xs + nx; i++) { 1090 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1091 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1092 1093 for (j = ys; j < ys + ny; j++) { 1094 slot = i - gxs + gnx * (j - gys); 1095 1096 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1097 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1098 1099 cnt = 0; 1100 for (k = 0; k < nc; k++) { 1101 for (l = lstart; l < lend + 1; l++) { 1102 for (p = pstart; p < pend + 1; p++) { 1103 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1104 cols[cnt++] = k + nc * (slot + gnx * l + p); 1105 } 1106 } 1107 } 1108 rows[k] = k + nc * (slot); 1109 } 1110 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1111 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1112 } 1113 } 1114 PetscCall(MatSetBlockSize(J, nc)); 1115 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1116 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1117 MatPreallocateEnd(dnz, onz); 1118 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1119 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1120 1121 /* 1122 For each node in the grid: we get the neighbors in the local (on processor ordering 1123 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1124 PETSc ordering. 1125 */ 1126 if (!da->prealloc_only) { 1127 for (i = xs; i < xs + nx; i++) { 1128 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1129 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1130 1131 for (j = ys; j < ys + ny; j++) { 1132 slot = i - gxs + gnx * (j - gys); 1133 1134 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1135 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1136 1137 cnt = 0; 1138 for (l = lstart; l < lend + 1; l++) { 1139 for (p = pstart; p < pend + 1; p++) { 1140 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1141 cols[cnt++] = nc * (slot + gnx * l + p); 1142 for (k = 1; k < nc; k++) { 1143 cols[cnt] = 1 + cols[cnt - 1]; 1144 cnt++; 1145 } 1146 } 1147 } 1148 } 1149 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1150 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1151 } 1152 } 1153 /* do not copy values to GPU since they are all zero and not yet needed there */ 1154 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1155 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1156 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1157 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1158 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1159 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1160 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1161 } 1162 PetscCall(PetscFree2(rows, cols)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1167 { 1168 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1169 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1170 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1171 DM_DA *dd = (DM_DA *)da->data; 1172 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1173 MPI_Comm comm; 1174 DMBoundaryType bx, by; 1175 ISLocalToGlobalMapping ltog; 1176 DMDAStencilType st; 1177 PetscBool removedups = PETSC_FALSE; 1178 1179 PetscFunctionBegin; 1180 /* 1181 nc - number of components per grid point 1182 col - number of colors needed in one direction for single component problem 1183 */ 1184 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1185 col = 2 * s + 1; 1186 /* 1187 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1188 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1189 */ 1190 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1191 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1192 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1193 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1194 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1195 1196 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1197 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1198 1199 PetscCall(MatSetBlockSize(J, nc)); 1200 /* determine the matrix preallocation information */ 1201 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1202 for (i = xs; i < xs + nx; i++) { 1203 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1204 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1205 1206 for (j = ys; j < ys + ny; j++) { 1207 slot = i - gxs + gnx * (j - gys); 1208 1209 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1210 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1211 1212 for (k = 0; k < nc; k++) { 1213 cnt = 0; 1214 for (l = lstart; l < lend + 1; l++) { 1215 for (p = pstart; p < pend + 1; p++) { 1216 if (l || p) { 1217 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1218 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1219 } 1220 } else { 1221 if (dfill) { 1222 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1223 } else { 1224 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1225 } 1226 } 1227 } 1228 } 1229 row = k + nc * (slot); 1230 maxcnt = PetscMax(maxcnt, cnt); 1231 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1232 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1233 } 1234 } 1235 } 1236 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1237 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1238 MatPreallocateEnd(dnz, onz); 1239 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1240 1241 /* 1242 For each node in the grid: we get the neighbors in the local (on processor ordering 1243 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1244 PETSc ordering. 1245 */ 1246 if (!da->prealloc_only) { 1247 for (i = xs; i < xs + nx; i++) { 1248 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1249 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1250 1251 for (j = ys; j < ys + ny; j++) { 1252 slot = i - gxs + gnx * (j - gys); 1253 1254 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1255 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1256 1257 for (k = 0; k < nc; k++) { 1258 cnt = 0; 1259 for (l = lstart; l < lend + 1; l++) { 1260 for (p = pstart; p < pend + 1; p++) { 1261 if (l || p) { 1262 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1263 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1264 } 1265 } else { 1266 if (dfill) { 1267 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1268 } else { 1269 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1270 } 1271 } 1272 } 1273 } 1274 row = k + nc * (slot); 1275 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1276 } 1277 } 1278 } 1279 /* do not copy values to GPU since they are all zero and not yet needed there */ 1280 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1281 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1282 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1283 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1284 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1285 } 1286 PetscCall(PetscFree(cols)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1291 { 1292 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1293 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1294 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1295 MPI_Comm comm; 1296 DMBoundaryType bx, by, bz; 1297 ISLocalToGlobalMapping ltog, mltog; 1298 DMDAStencilType st; 1299 PetscBool removedups = PETSC_FALSE; 1300 1301 PetscFunctionBegin; 1302 /* 1303 nc - number of components per grid point 1304 col - number of colors needed in one direction for single component problem 1305 */ 1306 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1307 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1308 col = 2 * s + 1; 1309 1310 /* 1311 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1312 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1313 */ 1314 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1315 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1316 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1317 1318 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1319 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1320 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1321 1322 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1323 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1324 1325 PetscCall(MatSetBlockSize(J, nc)); 1326 /* determine the matrix preallocation information */ 1327 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1328 for (i = xs; i < xs + nx; i++) { 1329 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1330 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1331 for (j = ys; j < ys + ny; j++) { 1332 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1333 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1334 for (k = zs; k < zs + nz; k++) { 1335 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1336 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1337 1338 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1339 1340 cnt = 0; 1341 for (l = 0; l < nc; l++) { 1342 for (ii = istart; ii < iend + 1; ii++) { 1343 for (jj = jstart; jj < jend + 1; jj++) { 1344 for (kk = kstart; kk < kend + 1; kk++) { 1345 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1346 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1347 } 1348 } 1349 } 1350 } 1351 rows[l] = l + nc * (slot); 1352 } 1353 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1354 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1355 } 1356 } 1357 } 1358 PetscCall(MatSetBlockSize(J, nc)); 1359 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1360 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1361 MatPreallocateEnd(dnz, onz); 1362 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1363 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1364 1365 /* 1366 For each node in the grid: we get the neighbors in the local (on processor ordering 1367 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1368 PETSc ordering. 1369 */ 1370 if (!da->prealloc_only) { 1371 for (i = xs; i < xs + nx; i++) { 1372 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1373 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1374 for (j = ys; j < ys + ny; j++) { 1375 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1376 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1377 for (k = zs; k < zs + nz; k++) { 1378 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1379 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1380 1381 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1382 1383 cnt = 0; 1384 for (kk = kstart; kk < kend + 1; kk++) { 1385 for (jj = jstart; jj < jend + 1; jj++) { 1386 for (ii = istart; ii < iend + 1; ii++) { 1387 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1388 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1389 for (l = 1; l < nc; l++) { 1390 cols[cnt] = 1 + cols[cnt - 1]; 1391 cnt++; 1392 } 1393 } 1394 } 1395 } 1396 } 1397 rows[0] = nc * (slot); 1398 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1399 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1400 } 1401 } 1402 } 1403 /* do not copy values to GPU since they are all zero and not yet needed there */ 1404 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1405 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1406 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1407 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1408 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1409 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1410 } 1411 PetscCall(PetscFree2(rows, cols)); 1412 PetscFunctionReturn(PETSC_SUCCESS); 1413 } 1414 1415 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1416 { 1417 DM_DA *dd = (DM_DA *)da->data; 1418 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1419 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1420 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1421 DMBoundaryType bx; 1422 ISLocalToGlobalMapping ltog; 1423 PetscMPIInt rank, size; 1424 1425 PetscFunctionBegin; 1426 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1427 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1428 1429 /* 1430 nc - number of components per grid point 1431 */ 1432 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1433 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1434 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1435 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1436 1437 PetscCall(MatSetBlockSize(J, nc)); 1438 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1439 1440 /* 1441 note should be smaller for first and last process with no periodic 1442 does not handle dfill 1443 */ 1444 cnt = 0; 1445 /* coupling with process to the left */ 1446 for (i = 0; i < s; i++) { 1447 for (j = 0; j < nc; j++) { 1448 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1449 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1450 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1451 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1452 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1453 } 1454 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1455 cnt++; 1456 } 1457 } 1458 for (i = s; i < nx - s; i++) { 1459 for (j = 0; j < nc; j++) { 1460 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1461 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1462 cnt++; 1463 } 1464 } 1465 /* coupling with process to the right */ 1466 for (i = nx - s; i < nx; i++) { 1467 for (j = 0; j < nc; j++) { 1468 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1469 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1470 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1471 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1472 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1473 } 1474 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1475 cnt++; 1476 } 1477 } 1478 1479 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1480 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1481 PetscCall(PetscFree2(cols, ocols)); 1482 1483 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1484 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1485 1486 /* 1487 For each node in the grid: we get the neighbors in the local (on processor ordering 1488 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1489 PETSc ordering. 1490 */ 1491 if (!da->prealloc_only) { 1492 PetscCall(PetscMalloc1(maxcnt, &cols)); 1493 row = xs * nc; 1494 /* coupling with process to the left */ 1495 for (i = xs; i < xs + s; i++) { 1496 for (j = 0; j < nc; j++) { 1497 cnt = 0; 1498 if (rank) { 1499 for (l = 0; l < s; l++) { 1500 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1501 } 1502 } 1503 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1504 for (l = 0; l < s; l++) { 1505 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1506 } 1507 } 1508 if (dfill) { 1509 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1510 } else { 1511 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1512 } 1513 for (l = 0; l < s; l++) { 1514 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1515 } 1516 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1517 row++; 1518 } 1519 } 1520 for (i = xs + s; i < xs + nx - s; i++) { 1521 for (j = 0; j < nc; j++) { 1522 cnt = 0; 1523 for (l = 0; l < s; l++) { 1524 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1525 } 1526 if (dfill) { 1527 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1528 } else { 1529 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1530 } 1531 for (l = 0; l < s; l++) { 1532 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1533 } 1534 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1535 row++; 1536 } 1537 } 1538 /* coupling with process to the right */ 1539 for (i = xs + nx - s; i < xs + nx; i++) { 1540 for (j = 0; j < nc; j++) { 1541 cnt = 0; 1542 for (l = 0; l < s; l++) { 1543 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1544 } 1545 if (dfill) { 1546 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1547 } else { 1548 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1549 } 1550 if (rank < size - 1) { 1551 for (l = 0; l < s; l++) { 1552 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1553 } 1554 } 1555 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1556 for (l = 0; l < s; l++) { 1557 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1558 } 1559 } 1560 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1561 row++; 1562 } 1563 } 1564 PetscCall(PetscFree(cols)); 1565 /* do not copy values to GPU since they are all zero and not yet needed there */ 1566 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1567 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1568 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1569 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1570 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1571 } 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1576 { 1577 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1578 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1579 PetscInt istart, iend; 1580 DMBoundaryType bx; 1581 ISLocalToGlobalMapping ltog, mltog; 1582 1583 PetscFunctionBegin; 1584 /* 1585 nc - number of components per grid point 1586 col - number of colors needed in one direction for single component problem 1587 */ 1588 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1589 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1590 col = 2 * s + 1; 1591 1592 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1593 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1594 1595 PetscCall(MatSetBlockSize(J, nc)); 1596 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1597 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1598 1599 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1600 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1601 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1602 1603 /* 1604 For each node in the grid: we get the neighbors in the local (on processor ordering 1605 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1606 PETSc ordering. 1607 */ 1608 if (!da->prealloc_only) { 1609 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1610 for (i = xs; i < xs + nx; i++) { 1611 istart = PetscMax(-s, gxs - i); 1612 iend = PetscMin(s, gxs + gnx - i - 1); 1613 slot = i - gxs; 1614 1615 cnt = 0; 1616 for (i1 = istart; i1 < iend + 1; i1++) { 1617 cols[cnt++] = nc * (slot + i1); 1618 for (l = 1; l < nc; l++) { 1619 cols[cnt] = 1 + cols[cnt - 1]; 1620 cnt++; 1621 } 1622 } 1623 rows[0] = nc * (slot); 1624 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1625 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1626 } 1627 /* do not copy values to GPU since they are all zero and not yet needed there */ 1628 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1629 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1630 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1631 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1632 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1633 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1634 PetscCall(PetscFree2(rows, cols)); 1635 } 1636 PetscFunctionReturn(PETSC_SUCCESS); 1637 } 1638 1639 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1640 { 1641 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1642 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1643 PetscInt istart, iend; 1644 DMBoundaryType bx; 1645 ISLocalToGlobalMapping ltog, mltog; 1646 1647 PetscFunctionBegin; 1648 /* 1649 nc - number of components per grid point 1650 col - number of colors needed in one direction for single component problem 1651 */ 1652 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1653 col = 2 * s + 1; 1654 1655 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1656 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1657 1658 PetscCall(MatSetBlockSize(J, nc)); 1659 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1660 1661 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1662 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1663 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1664 1665 /* 1666 For each node in the grid: we get the neighbors in the local (on processor ordering 1667 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1668 PETSc ordering. 1669 */ 1670 if (!da->prealloc_only) { 1671 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1672 for (i = xs; i < xs + nx; i++) { 1673 istart = PetscMax(-s, gxs - i); 1674 iend = PetscMin(s, gxs + gnx - i - 1); 1675 slot = i - gxs; 1676 1677 cnt = 0; 1678 for (i1 = istart; i1 < iend + 1; i1++) { 1679 cols[cnt++] = nc * (slot + i1); 1680 for (l = 1; l < nc; l++) { 1681 cols[cnt] = 1 + cols[cnt - 1]; 1682 cnt++; 1683 } 1684 } 1685 rows[0] = nc * (slot); 1686 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1687 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1688 } 1689 /* do not copy values to GPU since they are all zero and not yet needed there */ 1690 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1691 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1692 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1693 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1694 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1695 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1696 PetscCall(PetscFree2(rows, cols)); 1697 } 1698 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1703 { 1704 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1705 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1706 PetscInt istart, iend, jstart, jend, ii, jj; 1707 MPI_Comm comm; 1708 PetscScalar *values; 1709 DMBoundaryType bx, by; 1710 DMDAStencilType st; 1711 ISLocalToGlobalMapping ltog; 1712 1713 PetscFunctionBegin; 1714 /* 1715 nc - number of components per grid point 1716 col - number of colors needed in one direction for single component problem 1717 */ 1718 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1719 col = 2 * s + 1; 1720 1721 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1722 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1723 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1724 1725 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1726 1727 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1728 1729 /* determine the matrix preallocation information */ 1730 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1731 for (i = xs; i < xs + nx; i++) { 1732 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1733 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1734 for (j = ys; j < ys + ny; j++) { 1735 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1736 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1737 slot = i - gxs + gnx * (j - gys); 1738 1739 /* Find block columns in block row */ 1740 cnt = 0; 1741 for (ii = istart; ii < iend + 1; ii++) { 1742 for (jj = jstart; jj < jend + 1; jj++) { 1743 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1744 cols[cnt++] = slot + ii + gnx * jj; 1745 } 1746 } 1747 } 1748 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1749 } 1750 } 1751 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1752 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1753 MatPreallocateEnd(dnz, onz); 1754 1755 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1756 1757 /* 1758 For each node in the grid: we get the neighbors in the local (on processor ordering 1759 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1760 PETSc ordering. 1761 */ 1762 if (!da->prealloc_only) { 1763 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1764 for (i = xs; i < xs + nx; i++) { 1765 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1766 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1767 for (j = ys; j < ys + ny; j++) { 1768 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1769 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1770 slot = i - gxs + gnx * (j - gys); 1771 cnt = 0; 1772 for (ii = istart; ii < iend + 1; ii++) { 1773 for (jj = jstart; jj < jend + 1; jj++) { 1774 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1775 cols[cnt++] = slot + ii + gnx * jj; 1776 } 1777 } 1778 } 1779 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1780 } 1781 } 1782 PetscCall(PetscFree(values)); 1783 /* do not copy values to GPU since they are all zero and not yet needed there */ 1784 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1785 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1786 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1787 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1788 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1789 } 1790 PetscCall(PetscFree(cols)); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1795 { 1796 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1797 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1798 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1799 MPI_Comm comm; 1800 PetscScalar *values; 1801 DMBoundaryType bx, by, bz; 1802 DMDAStencilType st; 1803 ISLocalToGlobalMapping ltog; 1804 1805 PetscFunctionBegin; 1806 /* 1807 nc - number of components per grid point 1808 col - number of colors needed in one direction for single component problem 1809 */ 1810 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1811 col = 2 * s + 1; 1812 1813 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1814 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1815 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1816 1817 PetscCall(PetscMalloc1(col * col * col, &cols)); 1818 1819 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1820 1821 /* determine the matrix preallocation information */ 1822 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1823 for (i = xs; i < xs + nx; i++) { 1824 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1825 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1826 for (j = ys; j < ys + ny; j++) { 1827 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1828 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1829 for (k = zs; k < zs + nz; k++) { 1830 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1831 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1832 1833 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1834 1835 /* Find block columns in block row */ 1836 cnt = 0; 1837 for (ii = istart; ii < iend + 1; ii++) { 1838 for (jj = jstart; jj < jend + 1; jj++) { 1839 for (kk = kstart; kk < kend + 1; kk++) { 1840 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1841 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1842 } 1843 } 1844 } 1845 } 1846 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1847 } 1848 } 1849 } 1850 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1851 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1852 MatPreallocateEnd(dnz, onz); 1853 1854 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1855 1856 /* 1857 For each node in the grid: we get the neighbors in the local (on processor ordering 1858 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1859 PETSc ordering. 1860 */ 1861 if (!da->prealloc_only) { 1862 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1863 for (i = xs; i < xs + nx; i++) { 1864 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1865 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1866 for (j = ys; j < ys + ny; j++) { 1867 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1868 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1869 for (k = zs; k < zs + nz; k++) { 1870 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1871 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1872 1873 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1874 1875 cnt = 0; 1876 for (ii = istart; ii < iend + 1; ii++) { 1877 for (jj = jstart; jj < jend + 1; jj++) { 1878 for (kk = kstart; kk < kend + 1; kk++) { 1879 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1880 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1881 } 1882 } 1883 } 1884 } 1885 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1886 } 1887 } 1888 } 1889 PetscCall(PetscFree(values)); 1890 /* do not copy values to GPU since they are all zero and not yet needed there */ 1891 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1892 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1893 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1894 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1895 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1896 } 1897 PetscCall(PetscFree(cols)); 1898 PetscFunctionReturn(PETSC_SUCCESS); 1899 } 1900 1901 /* 1902 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1903 identify in the local ordering with periodic domain. 1904 */ 1905 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1906 { 1907 PetscInt i, n; 1908 1909 PetscFunctionBegin; 1910 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1911 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1912 for (i = 0, n = 0; i < *cnt; i++) { 1913 if (col[i] >= *row) col[n++] = col[i]; 1914 } 1915 *cnt = n; 1916 PetscFunctionReturn(PETSC_SUCCESS); 1917 } 1918 1919 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1920 { 1921 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1922 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1923 PetscInt istart, iend, jstart, jend, ii, jj; 1924 MPI_Comm comm; 1925 PetscScalar *values; 1926 DMBoundaryType bx, by; 1927 DMDAStencilType st; 1928 ISLocalToGlobalMapping ltog; 1929 1930 PetscFunctionBegin; 1931 /* 1932 nc - number of components per grid point 1933 col - number of colors needed in one direction for single component problem 1934 */ 1935 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1936 col = 2 * s + 1; 1937 1938 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1939 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1940 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1941 1942 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1943 1944 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1945 1946 /* determine the matrix preallocation information */ 1947 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1948 for (i = xs; i < xs + nx; i++) { 1949 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1950 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1951 for (j = ys; j < ys + ny; j++) { 1952 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1953 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1954 slot = i - gxs + gnx * (j - gys); 1955 1956 /* Find block columns in block row */ 1957 cnt = 0; 1958 for (ii = istart; ii < iend + 1; ii++) { 1959 for (jj = jstart; jj < jend + 1; jj++) { 1960 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1961 } 1962 } 1963 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1964 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1965 } 1966 } 1967 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1968 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1969 MatPreallocateEnd(dnz, onz); 1970 1971 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1972 1973 /* 1974 For each node in the grid: we get the neighbors in the local (on processor ordering 1975 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1976 PETSc ordering. 1977 */ 1978 if (!da->prealloc_only) { 1979 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1980 for (i = xs; i < xs + nx; i++) { 1981 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1982 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1983 for (j = ys; j < ys + ny; j++) { 1984 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1985 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1986 slot = i - gxs + gnx * (j - gys); 1987 1988 /* Find block columns in block row */ 1989 cnt = 0; 1990 for (ii = istart; ii < iend + 1; ii++) { 1991 for (jj = jstart; jj < jend + 1; jj++) { 1992 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1993 } 1994 } 1995 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1996 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1997 } 1998 } 1999 PetscCall(PetscFree(values)); 2000 /* do not copy values to GPU since they are all zero and not yet needed there */ 2001 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2002 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2003 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2004 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2005 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2006 } 2007 PetscCall(PetscFree(cols)); 2008 PetscFunctionReturn(PETSC_SUCCESS); 2009 } 2010 2011 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2012 { 2013 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2014 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2015 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2016 MPI_Comm comm; 2017 PetscScalar *values; 2018 DMBoundaryType bx, by, bz; 2019 DMDAStencilType st; 2020 ISLocalToGlobalMapping ltog; 2021 2022 PetscFunctionBegin; 2023 /* 2024 nc - number of components per grid point 2025 col - number of colors needed in one direction for single component problem 2026 */ 2027 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2028 col = 2 * s + 1; 2029 2030 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2031 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2032 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2033 2034 /* create the matrix */ 2035 PetscCall(PetscMalloc1(col * col * col, &cols)); 2036 2037 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2038 2039 /* determine the matrix preallocation information */ 2040 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2041 for (i = xs; i < xs + nx; i++) { 2042 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2043 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2044 for (j = ys; j < ys + ny; j++) { 2045 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2046 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2047 for (k = zs; k < zs + nz; k++) { 2048 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2049 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2050 2051 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2052 2053 /* Find block columns in block row */ 2054 cnt = 0; 2055 for (ii = istart; ii < iend + 1; ii++) { 2056 for (jj = jstart; jj < jend + 1; jj++) { 2057 for (kk = kstart; kk < kend + 1; kk++) { 2058 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2059 } 2060 } 2061 } 2062 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2063 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2064 } 2065 } 2066 } 2067 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2068 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2069 MatPreallocateEnd(dnz, onz); 2070 2071 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2072 2073 /* 2074 For each node in the grid: we get the neighbors in the local (on processor ordering 2075 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2076 PETSc ordering. 2077 */ 2078 if (!da->prealloc_only) { 2079 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2080 for (i = xs; i < xs + nx; i++) { 2081 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2082 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2083 for (j = ys; j < ys + ny; j++) { 2084 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2085 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2086 for (k = zs; k < zs + nz; k++) { 2087 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2088 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2089 2090 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2091 2092 cnt = 0; 2093 for (ii = istart; ii < iend + 1; ii++) { 2094 for (jj = jstart; jj < jend + 1; jj++) { 2095 for (kk = kstart; kk < kend + 1; kk++) { 2096 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2097 } 2098 } 2099 } 2100 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2101 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2102 } 2103 } 2104 } 2105 PetscCall(PetscFree(values)); 2106 /* do not copy values to GPU since they are all zero and not yet needed there */ 2107 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2108 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2109 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2110 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2111 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2112 } 2113 PetscCall(PetscFree(cols)); 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2118 { 2119 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2120 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2121 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2122 DM_DA *dd = (DM_DA *)da->data; 2123 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2124 MPI_Comm comm; 2125 PetscScalar *values; 2126 DMBoundaryType bx, by, bz; 2127 ISLocalToGlobalMapping ltog; 2128 DMDAStencilType st; 2129 PetscBool removedups = PETSC_FALSE; 2130 2131 PetscFunctionBegin; 2132 /* 2133 nc - number of components per grid point 2134 col - number of colors needed in one direction for single component problem 2135 */ 2136 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2137 col = 2 * s + 1; 2138 2139 /* 2140 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2141 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2142 */ 2143 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2144 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2145 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2146 2147 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2148 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2149 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2150 2151 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2152 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2153 2154 /* determine the matrix preallocation information */ 2155 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2156 2157 PetscCall(MatSetBlockSize(J, nc)); 2158 for (i = xs; i < xs + nx; i++) { 2159 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2160 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2161 for (j = ys; j < ys + ny; j++) { 2162 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2163 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2164 for (k = zs; k < zs + nz; k++) { 2165 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2166 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2167 2168 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2169 2170 for (l = 0; l < nc; l++) { 2171 cnt = 0; 2172 for (ii = istart; ii < iend + 1; ii++) { 2173 for (jj = jstart; jj < jend + 1; jj++) { 2174 for (kk = kstart; kk < kend + 1; kk++) { 2175 if (ii || jj || kk) { 2176 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2177 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2178 } 2179 } else { 2180 if (dfill) { 2181 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2182 } else { 2183 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2184 } 2185 } 2186 } 2187 } 2188 } 2189 row = l + nc * (slot); 2190 maxcnt = PetscMax(maxcnt, cnt); 2191 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2192 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2193 } 2194 } 2195 } 2196 } 2197 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2198 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2199 MatPreallocateEnd(dnz, onz); 2200 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2201 2202 /* 2203 For each node in the grid: we get the neighbors in the local (on processor ordering 2204 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2205 PETSc ordering. 2206 */ 2207 if (!da->prealloc_only) { 2208 PetscCall(PetscCalloc1(maxcnt, &values)); 2209 for (i = xs; i < xs + nx; i++) { 2210 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2211 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2212 for (j = ys; j < ys + ny; j++) { 2213 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2214 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2215 for (k = zs; k < zs + nz; k++) { 2216 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2217 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2218 2219 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2220 2221 for (l = 0; l < nc; l++) { 2222 cnt = 0; 2223 for (ii = istart; ii < iend + 1; ii++) { 2224 for (jj = jstart; jj < jend + 1; jj++) { 2225 for (kk = kstart; kk < kend + 1; kk++) { 2226 if (ii || jj || kk) { 2227 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2228 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2229 } 2230 } else { 2231 if (dfill) { 2232 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2233 } else { 2234 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2235 } 2236 } 2237 } 2238 } 2239 } 2240 row = l + nc * (slot); 2241 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2242 } 2243 } 2244 } 2245 } 2246 PetscCall(PetscFree(values)); 2247 /* do not copy values to GPU since they are all zero and not yet needed there */ 2248 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2249 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2250 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2251 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2252 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2253 } 2254 PetscCall(PetscFree(cols)); 2255 PetscFunctionReturn(PETSC_SUCCESS); 2256 } 2257