1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 72 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 73 columns to the left with any nonzeros in them plus 1 */ 74 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 75 for (i = 0; i < dd->w; i++) { 76 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 77 } 78 for (i = 0; i < dd->w; i++) { 79 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /*@ 85 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 86 of the matrix returned by `DMCreateMatrix()`. 87 88 Logically Collective 89 90 Input Parameters: 91 + da - the distributed array 92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 93 - ofill - the fill pattern in the off-diagonal blocks 94 95 Level: developer 96 97 Notes: 98 This only makes sense when you are doing multicomponent problems but using the 99 `MATMPIAIJ` matrix format 100 101 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 102 representing coupling and 0 entries for missing coupling. For example 103 .vb 104 dfill[9] = {1, 0, 0, 105 1, 1, 0, 106 0, 1, 1} 107 .ve 108 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 109 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 110 diagonal block). 111 112 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 113 can be represented in the dfill, ofill format 114 115 Contributed by Glenn Hammond 116 117 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the distributed array 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by Philip C. Roth 169 170 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 171 @*/ 172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 173 { 174 DM_DA *dd = (DM_DA *)da->data; 175 176 PetscFunctionBegin; 177 /* save the given dfill and ofill information */ 178 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 179 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 180 181 /* count nonzeros in ofill columns */ 182 PetscCall(DMDASetBlockFills_Private2(dd)); 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 187 { 188 PetscInt dim, m, n, p, nc; 189 DMBoundaryType bx, by, bz; 190 MPI_Comm comm; 191 PetscMPIInt size; 192 PetscBool isBAIJ; 193 DM_DA *dd = (DM_DA *)da->data; 194 195 PetscFunctionBegin; 196 /* 197 m 198 ------------------------------------------------------ 199 | | 200 | | 201 | ---------------------- | 202 | | | | 203 n | yn | | | 204 | | | | 205 | .--------------------- | 206 | (xs,ys) xn | 207 | . | 208 | (gxs,gys) | 209 | | 210 ----------------------------------------------------- 211 */ 212 213 /* 214 nc - number of components per grid point 215 col - number of colors needed in one direction for single component problem 216 217 */ 218 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 219 220 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 221 PetscCallMPI(MPI_Comm_size(comm, &size)); 222 if (ctype == IS_COLORING_LOCAL) { 223 if (size == 1) { 224 ctype = IS_COLORING_GLOBAL; 225 } else { 226 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 227 } 228 } 229 230 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 231 matrices is for the blocks, not the individual matrix elements */ 232 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 233 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 235 if (isBAIJ) { 236 dd->w = 1; 237 dd->xs = dd->xs / nc; 238 dd->xe = dd->xe / nc; 239 dd->Xs = dd->Xs / nc; 240 dd->Xe = dd->Xe / nc; 241 } 242 243 /* 244 We do not provide a getcoloring function in the DMDA operations because 245 the basic DMDA does not know about matrices. We think of DMDA as being 246 more low-level then matrices. 247 */ 248 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 249 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 251 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 252 if (isBAIJ) { 253 dd->w = nc; 254 dd->xs = dd->xs * nc; 255 dd->xe = dd->xe * nc; 256 dd->Xs = dd->Xs * nc; 257 dd->Xe = dd->Xe * nc; 258 } 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 263 { 264 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 265 PetscInt ncolors = 0; 266 MPI_Comm comm; 267 DMBoundaryType bx, by; 268 DMDAStencilType st; 269 ISColoringValue *colors; 270 DM_DA *dd = (DM_DA *)da->data; 271 272 PetscFunctionBegin; 273 /* 274 nc - number of components per grid point 275 col - number of colors needed in one direction for single component problem 276 277 */ 278 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 279 col = 2 * s + 1; 280 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 281 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 282 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 283 284 /* special case as taught to us by Paul Hovland */ 285 if (st == DMDA_STENCIL_STAR && s == 1) { 286 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 287 } else { 288 if (ctype == IS_COLORING_GLOBAL) { 289 if (!dd->localcoloring) { 290 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 291 ii = 0; 292 for (j = ys; j < ys + ny; j++) { 293 for (i = xs; i < xs + nx; i++) { 294 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 295 } 296 } 297 ncolors = nc + nc * (col - 1 + col * (col - 1)); 298 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 299 } 300 *coloring = dd->localcoloring; 301 } else if (ctype == IS_COLORING_LOCAL) { 302 if (!dd->ghostedcoloring) { 303 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 304 ii = 0; 305 for (j = gys; j < gys + gny; j++) { 306 for (i = gxs; i < gxs + gnx; i++) { 307 for (k = 0; k < nc; k++) { 308 /* the complicated stuff is to handle periodic boundaries */ 309 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 310 } 311 } 312 } 313 ncolors = nc + nc * (col - 1 + col * (col - 1)); 314 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 315 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 316 317 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 318 } 319 *coloring = dd->ghostedcoloring; 320 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 321 } 322 PetscCall(ISColoringReference(*coloring)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 327 { 328 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 329 PetscInt ncolors; 330 MPI_Comm comm; 331 DMBoundaryType bx, by, bz; 332 DMDAStencilType st; 333 ISColoringValue *colors; 334 DM_DA *dd = (DM_DA *)da->data; 335 336 PetscFunctionBegin; 337 /* 338 nc - number of components per grid point 339 col - number of colors needed in one direction for single component problem 340 */ 341 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 342 col = 2 * s + 1; 343 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 344 by 2*stencil_width + 1\n"); 345 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 346 by 2*stencil_width + 1\n"); 347 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 348 by 2*stencil_width + 1\n"); 349 350 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 351 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 352 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 353 354 /* create the coloring */ 355 if (ctype == IS_COLORING_GLOBAL) { 356 if (!dd->localcoloring) { 357 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 358 ii = 0; 359 for (k = zs; k < zs + nz; k++) { 360 for (j = ys; j < ys + ny; j++) { 361 for (i = xs; i < xs + nx; i++) { 362 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 363 } 364 } 365 } 366 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 367 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 368 } 369 *coloring = dd->localcoloring; 370 } else if (ctype == IS_COLORING_LOCAL) { 371 if (!dd->ghostedcoloring) { 372 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 373 ii = 0; 374 for (k = gzs; k < gzs + gnz; k++) { 375 for (j = gys; j < gys + gny; j++) { 376 for (i = gxs; i < gxs + gnx; i++) { 377 for (l = 0; l < nc; l++) { 378 /* the complicated stuff is to handle periodic boundaries */ 379 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 385 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 386 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 387 } 388 *coloring = dd->ghostedcoloring; 389 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 390 PetscCall(ISColoringReference(*coloring)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 395 { 396 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 397 PetscInt ncolors; 398 MPI_Comm comm; 399 DMBoundaryType bx; 400 ISColoringValue *colors; 401 DM_DA *dd = (DM_DA *)da->data; 402 403 PetscFunctionBegin; 404 /* 405 nc - number of components per grid point 406 col - number of colors needed in one direction for single component problem 407 */ 408 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 409 col = 2 * s + 1; 410 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 411 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 412 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 413 414 /* create the coloring */ 415 if (ctype == IS_COLORING_GLOBAL) { 416 if (!dd->localcoloring) { 417 PetscCall(PetscMalloc1(nc * nx, &colors)); 418 if (dd->ofillcols) { 419 PetscInt tc = 0; 420 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 421 i1 = 0; 422 for (i = xs; i < xs + nx; i++) { 423 for (l = 0; l < nc; l++) { 424 if (dd->ofillcols[l] && (i % col)) { 425 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 426 } else { 427 colors[i1++] = l; 428 } 429 } 430 } 431 ncolors = nc + 2 * s * tc; 432 } else { 433 i1 = 0; 434 for (i = xs; i < xs + nx; i++) { 435 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 436 } 437 ncolors = nc + nc * (col - 1); 438 } 439 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 440 } 441 *coloring = dd->localcoloring; 442 } else if (ctype == IS_COLORING_LOCAL) { 443 if (!dd->ghostedcoloring) { 444 PetscCall(PetscMalloc1(nc * gnx, &colors)); 445 i1 = 0; 446 for (i = gxs; i < gxs + gnx; i++) { 447 for (l = 0; l < nc; l++) { 448 /* the complicated stuff is to handle periodic boundaries */ 449 colors[i1++] = l + nc * (SetInRange(i, m) % col); 450 } 451 } 452 ncolors = nc + nc * (col - 1); 453 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 454 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 455 } 456 *coloring = dd->ghostedcoloring; 457 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 458 PetscCall(ISColoringReference(*coloring)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 463 { 464 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 465 PetscInt ncolors; 466 MPI_Comm comm; 467 DMBoundaryType bx, by; 468 ISColoringValue *colors; 469 DM_DA *dd = (DM_DA *)da->data; 470 471 PetscFunctionBegin; 472 /* 473 nc - number of components per grid point 474 col - number of colors needed in one direction for single component problem 475 */ 476 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 477 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 478 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 479 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 480 /* create the coloring */ 481 if (ctype == IS_COLORING_GLOBAL) { 482 if (!dd->localcoloring) { 483 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 484 ii = 0; 485 for (j = ys; j < ys + ny; j++) { 486 for (i = xs; i < xs + nx; i++) { 487 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 488 } 489 } 490 ncolors = 5 * nc; 491 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 492 } 493 *coloring = dd->localcoloring; 494 } else if (ctype == IS_COLORING_LOCAL) { 495 if (!dd->ghostedcoloring) { 496 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 497 ii = 0; 498 for (j = gys; j < gys + gny; j++) { 499 for (i = gxs; i < gxs + gnx; i++) { 500 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 501 } 502 } 503 ncolors = 5 * nc; 504 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 505 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 506 } 507 *coloring = dd->ghostedcoloring; 508 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 /* =========================================================================== */ 513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 527 528 /*@C 529 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 530 531 Logically Collective 532 533 Input Parameters: 534 + mat - the matrix 535 - da - the da 536 537 Level: intermediate 538 539 .seealso: `DMDA`, `Mat`, `MatSetUp()` 540 @*/ 541 PetscErrorCode MatSetupDM(Mat mat, DM da) 542 { 543 PetscFunctionBegin; 544 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 545 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 546 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 551 { 552 DM da; 553 const char *prefix; 554 Mat AA, Anatural; 555 AO ao; 556 PetscInt rstart, rend, *petsc, i; 557 IS is; 558 MPI_Comm comm; 559 PetscViewerFormat format; 560 PetscBool flag; 561 562 PetscFunctionBegin; 563 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 564 PetscCall(PetscViewerGetFormat(viewer, &format)); 565 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 566 567 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 568 PetscCall(MatGetDM(A, &da)); 569 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 570 571 PetscCall(DMDAGetAO(da, &ao)); 572 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 573 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 574 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 575 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 576 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 577 578 /* call viewer on natural ordering */ 579 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 580 if (flag) { 581 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 582 A = AA; 583 } 584 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 585 PetscCall(ISDestroy(&is)); 586 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 587 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 588 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 589 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 590 PetscCall(MatView(Anatural, viewer)); 591 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 592 PetscCall(MatDestroy(&Anatural)); 593 if (flag) PetscCall(MatDestroy(&AA)); 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 598 { 599 DM da; 600 Mat Anatural, Aapp; 601 AO ao; 602 PetscInt rstart, rend, *app, i, m, n, M, N; 603 IS is; 604 MPI_Comm comm; 605 606 PetscFunctionBegin; 607 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 608 PetscCall(MatGetDM(A, &da)); 609 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 610 611 /* Load the matrix in natural ordering */ 612 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 613 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 614 PetscCall(MatGetSize(A, &M, &N)); 615 PetscCall(MatGetLocalSize(A, &m, &n)); 616 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 617 PetscCall(MatLoad(Anatural, viewer)); 618 619 /* Map natural ordering to application ordering and create IS */ 620 PetscCall(DMDAGetAO(da, &ao)); 621 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 622 PetscCall(PetscMalloc1(rend - rstart, &app)); 623 for (i = rstart; i < rend; i++) app[i - rstart] = i; 624 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 625 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 626 627 /* Do permutation and replace header */ 628 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 629 PetscCall(MatHeaderReplace(A, &Aapp)); 630 PetscCall(ISDestroy(&is)); 631 PetscCall(MatDestroy(&Anatural)); 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 635 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 636 { 637 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 638 Mat A; 639 MPI_Comm comm; 640 MatType Atype; 641 MatType mtype; 642 PetscMPIInt size; 643 DM_DA *dd = (DM_DA *)da->data; 644 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 645 646 PetscFunctionBegin; 647 PetscCall(MatInitializePackage()); 648 mtype = da->mattype; 649 650 /* 651 m 652 ------------------------------------------------------ 653 | | 654 | | 655 | ---------------------- | 656 | | | | 657 n | ny | | | 658 | | | | 659 | .--------------------- | 660 | (xs,ys) nx | 661 | . | 662 | (gxs,gys) | 663 | | 664 ----------------------------------------------------- 665 */ 666 667 /* 668 nc - number of components per grid point 669 col - number of colors needed in one direction for single component problem 670 */ 671 M = dd->M; 672 N = dd->N; 673 P = dd->P; 674 dim = da->dim; 675 dof = dd->w; 676 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 677 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 678 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 679 PetscCall(MatCreate(comm, &A)); 680 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 681 PetscCall(MatSetType(A, mtype)); 682 PetscCall(MatSetFromOptions(A)); 683 if (dof * nx * ny * nz < da->bind_below) { 684 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 685 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 686 } 687 PetscCall(MatSetDM(A, da)); 688 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 689 PetscCall(MatGetType(A, &Atype)); 690 /* 691 We do not provide a getmatrix function in the DMDA operations because 692 the basic DMDA does not know about matrices. We think of DMDA as being more 693 more low-level than matrices. This is kind of cheating but, cause sometimes 694 we think of DMDA has higher level than matrices. 695 696 We could switch based on Atype (or mtype), but we do not since the 697 specialized setting routines depend only on the particular preallocation 698 details of the matrix, not the type itself. 699 */ 700 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 701 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 702 if (!aij) { 703 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 704 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 705 if (!baij) { 706 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 707 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 708 if (!sbaij) { 709 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 710 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 711 } 712 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 713 } 714 } 715 if (aij) { 716 if (dim == 1) { 717 if (dd->ofill) { 718 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 719 } else { 720 DMBoundaryType bx; 721 PetscMPIInt size; 722 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 723 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 724 if (size == 1 && bx == DM_BOUNDARY_NONE) { 725 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 726 } else { 727 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 728 } 729 } 730 } else if (dim == 2) { 731 if (dd->ofill) { 732 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 733 } else { 734 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 735 } 736 } else if (dim == 3) { 737 if (dd->ofill) { 738 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 739 } else { 740 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 741 } 742 } 743 } else if (baij) { 744 if (dim == 2) { 745 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 746 } else if (dim == 3) { 747 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 748 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 749 } else if (sbaij) { 750 if (dim == 2) { 751 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 752 } else if (dim == 3) { 753 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 754 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 755 } else if (sell) { 756 if (dim == 2) { 757 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 758 } else if (dim == 3) { 759 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 760 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 761 } else if (is) { 762 PetscCall(DMCreateMatrix_DA_IS(da, A)); 763 } else { 764 ISLocalToGlobalMapping ltog; 765 766 PetscCall(MatSetBlockSize(A, dof)); 767 PetscCall(MatSetUp(A)); 768 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 769 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 770 } 771 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 772 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 773 PetscCall(MatSetDM(A, da)); 774 PetscCallMPI(MPI_Comm_size(comm, &size)); 775 if (size > 1) { 776 /* change viewer to display matrix in natural ordering */ 777 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 778 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 779 } 780 *J = A; 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 785 786 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 787 { 788 DM_DA *da = (DM_DA *)dm->data; 789 Mat lJ, P; 790 ISLocalToGlobalMapping ltog; 791 IS is; 792 PetscBT bt; 793 const PetscInt *e_loc, *idx; 794 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 795 796 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 797 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 798 PetscFunctionBegin; 799 dof = da->w; 800 PetscCall(MatSetBlockSize(J, dof)); 801 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 802 803 /* flag local elements indices in local DMDA numbering */ 804 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 805 PetscCall(PetscBTCreate(nv / dof, &bt)); 806 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 807 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 808 809 /* filter out (set to -1) the global indices not used by the local elements */ 810 PetscCall(PetscMalloc1(nv / dof, &gidx)); 811 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 812 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 813 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 814 for (i = 0; i < nv / dof; i++) 815 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 816 PetscCall(PetscBTDestroy(&bt)); 817 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 818 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 819 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 820 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 821 PetscCall(ISDestroy(&is)); 822 823 /* Preallocation */ 824 if (dm->prealloc_skip) { 825 PetscCall(MatSetUp(J)); 826 } else { 827 PetscCall(MatISGetLocalMat(J, &lJ)); 828 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 829 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 830 PetscCall(MatSetType(P, MATPREALLOCATOR)); 831 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 832 PetscCall(MatGetSize(lJ, &N, NULL)); 833 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 834 PetscCall(MatSetSizes(P, n, n, N, N)); 835 PetscCall(MatSetBlockSize(P, dof)); 836 PetscCall(MatSetUp(P)); 837 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 838 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 839 PetscCall(MatISRestoreLocalMat(J, &lJ)); 840 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 841 PetscCall(MatDestroy(&P)); 842 843 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 844 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 845 } 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 850 { 851 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 852 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 853 MPI_Comm comm; 854 PetscScalar *values; 855 DMBoundaryType bx, by; 856 ISLocalToGlobalMapping ltog; 857 DMDAStencilType st; 858 859 PetscFunctionBegin; 860 /* 861 nc - number of components per grid point 862 col - number of colors needed in one direction for single component problem 863 */ 864 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 865 col = 2 * s + 1; 866 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 867 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 868 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 869 870 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 871 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 872 873 PetscCall(MatSetBlockSize(J, nc)); 874 /* determine the matrix preallocation information */ 875 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 876 for (i = xs; i < xs + nx; i++) { 877 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 878 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 879 880 for (j = ys; j < ys + ny; j++) { 881 slot = i - gxs + gnx * (j - gys); 882 883 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 884 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 885 886 cnt = 0; 887 for (k = 0; k < nc; k++) { 888 for (l = lstart; l < lend + 1; l++) { 889 for (p = pstart; p < pend + 1; p++) { 890 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 891 cols[cnt++] = k + nc * (slot + gnx * l + p); 892 } 893 } 894 } 895 rows[k] = k + nc * (slot); 896 } 897 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 898 } 899 } 900 PetscCall(MatSetBlockSize(J, nc)); 901 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 902 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 903 MatPreallocateEnd(dnz, onz); 904 905 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 906 907 /* 908 For each node in the grid: we get the neighbors in the local (on processor ordering 909 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 910 PETSc ordering. 911 */ 912 if (!da->prealloc_only) { 913 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 914 for (i = xs; i < xs + nx; i++) { 915 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 916 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 917 918 for (j = ys; j < ys + ny; j++) { 919 slot = i - gxs + gnx * (j - gys); 920 921 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 922 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 923 924 cnt = 0; 925 for (k = 0; k < nc; k++) { 926 for (l = lstart; l < lend + 1; l++) { 927 for (p = pstart; p < pend + 1; p++) { 928 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 929 cols[cnt++] = k + nc * (slot + gnx * l + p); 930 } 931 } 932 } 933 rows[k] = k + nc * (slot); 934 } 935 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 936 } 937 } 938 PetscCall(PetscFree(values)); 939 /* do not copy values to GPU since they are all zero and not yet needed there */ 940 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 941 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 942 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 943 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 944 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 945 } 946 PetscCall(PetscFree2(rows, cols)); 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 951 { 952 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 953 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 954 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 955 MPI_Comm comm; 956 PetscScalar *values; 957 DMBoundaryType bx, by, bz; 958 ISLocalToGlobalMapping ltog; 959 DMDAStencilType st; 960 961 PetscFunctionBegin; 962 /* 963 nc - number of components per grid point 964 col - number of colors needed in one direction for single component problem 965 */ 966 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 967 col = 2 * s + 1; 968 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 969 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 970 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 971 972 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 973 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 974 975 PetscCall(MatSetBlockSize(J, nc)); 976 /* determine the matrix preallocation information */ 977 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 978 for (i = xs; i < xs + nx; i++) { 979 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 980 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 981 for (j = ys; j < ys + ny; j++) { 982 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 983 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 984 for (k = zs; k < zs + nz; k++) { 985 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 986 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 987 988 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 989 990 cnt = 0; 991 for (l = 0; l < nc; l++) { 992 for (ii = istart; ii < iend + 1; ii++) { 993 for (jj = jstart; jj < jend + 1; jj++) { 994 for (kk = kstart; kk < kend + 1; kk++) { 995 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 996 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 997 } 998 } 999 } 1000 } 1001 rows[l] = l + nc * (slot); 1002 } 1003 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1004 } 1005 } 1006 } 1007 PetscCall(MatSetBlockSize(J, nc)); 1008 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1009 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1010 MatPreallocateEnd(dnz, onz); 1011 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1012 1013 /* 1014 For each node in the grid: we get the neighbors in the local (on processor ordering 1015 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1016 PETSc ordering. 1017 */ 1018 if (!da->prealloc_only) { 1019 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1020 for (i = xs; i < xs + nx; i++) { 1021 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1022 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1023 for (j = ys; j < ys + ny; j++) { 1024 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1025 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1026 for (k = zs; k < zs + nz; k++) { 1027 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1028 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1029 1030 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1031 1032 cnt = 0; 1033 for (l = 0; l < nc; l++) { 1034 for (ii = istart; ii < iend + 1; ii++) { 1035 for (jj = jstart; jj < jend + 1; jj++) { 1036 for (kk = kstart; kk < kend + 1; kk++) { 1037 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1038 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1039 } 1040 } 1041 } 1042 } 1043 rows[l] = l + nc * (slot); 1044 } 1045 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1046 } 1047 } 1048 } 1049 PetscCall(PetscFree(values)); 1050 /* do not copy values to GPU since they are all zero and not yet needed there */ 1051 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1052 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1053 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1054 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1055 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1056 } 1057 PetscCall(PetscFree2(rows, cols)); 1058 PetscFunctionReturn(PETSC_SUCCESS); 1059 } 1060 1061 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1062 { 1063 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1064 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1065 MPI_Comm comm; 1066 DMBoundaryType bx, by; 1067 ISLocalToGlobalMapping ltog, mltog; 1068 DMDAStencilType st; 1069 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1070 1071 PetscFunctionBegin; 1072 /* 1073 nc - number of components per grid point 1074 col - number of colors needed in one direction for single component problem 1075 */ 1076 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1077 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1078 col = 2 * s + 1; 1079 /* 1080 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1081 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1082 */ 1083 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1084 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1085 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1086 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1087 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1088 1089 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1090 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1091 1092 PetscCall(MatSetBlockSize(J, nc)); 1093 /* determine the matrix preallocation information */ 1094 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1095 for (i = xs; i < xs + nx; i++) { 1096 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1097 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1098 1099 for (j = ys; j < ys + ny; j++) { 1100 slot = i - gxs + gnx * (j - gys); 1101 1102 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1103 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1104 1105 cnt = 0; 1106 for (k = 0; k < nc; k++) { 1107 for (l = lstart; l < lend + 1; l++) { 1108 for (p = pstart; p < pend + 1; p++) { 1109 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1110 cols[cnt++] = k + nc * (slot + gnx * l + p); 1111 } 1112 } 1113 } 1114 rows[k] = k + nc * (slot); 1115 } 1116 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1117 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1118 } 1119 } 1120 PetscCall(MatSetBlockSize(J, nc)); 1121 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1122 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1123 MatPreallocateEnd(dnz, onz); 1124 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1125 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1126 1127 /* 1128 For each node in the grid: we get the neighbors in the local (on processor ordering 1129 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1130 PETSc ordering. 1131 */ 1132 if (!da->prealloc_only) { 1133 for (i = xs; i < xs + nx; i++) { 1134 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1135 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1136 1137 for (j = ys; j < ys + ny; j++) { 1138 slot = i - gxs + gnx * (j - gys); 1139 1140 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1141 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1142 1143 cnt = 0; 1144 for (l = lstart; l < lend + 1; l++) { 1145 for (p = pstart; p < pend + 1; p++) { 1146 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1147 cols[cnt++] = nc * (slot + gnx * l + p); 1148 for (k = 1; k < nc; k++) { 1149 cols[cnt] = 1 + cols[cnt - 1]; 1150 cnt++; 1151 } 1152 } 1153 } 1154 } 1155 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1156 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1157 } 1158 } 1159 /* do not copy values to GPU since they are all zero and not yet needed there */ 1160 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1161 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1162 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1163 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1164 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1165 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1166 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1167 } 1168 PetscCall(PetscFree2(rows, cols)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1173 { 1174 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1175 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1176 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1177 DM_DA *dd = (DM_DA *)da->data; 1178 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1179 MPI_Comm comm; 1180 DMBoundaryType bx, by; 1181 ISLocalToGlobalMapping ltog; 1182 DMDAStencilType st; 1183 PetscBool removedups = PETSC_FALSE; 1184 1185 PetscFunctionBegin; 1186 /* 1187 nc - number of components per grid point 1188 col - number of colors needed in one direction for single component problem 1189 */ 1190 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1191 col = 2 * s + 1; 1192 /* 1193 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1194 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1195 */ 1196 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1197 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1198 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1199 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1200 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1201 1202 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1203 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1204 1205 PetscCall(MatSetBlockSize(J, nc)); 1206 /* determine the matrix preallocation information */ 1207 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1208 for (i = xs; i < xs + nx; i++) { 1209 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1210 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1211 1212 for (j = ys; j < ys + ny; j++) { 1213 slot = i - gxs + gnx * (j - gys); 1214 1215 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1216 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1217 1218 for (k = 0; k < nc; k++) { 1219 cnt = 0; 1220 for (l = lstart; l < lend + 1; l++) { 1221 for (p = pstart; p < pend + 1; p++) { 1222 if (l || p) { 1223 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1224 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1225 } 1226 } else { 1227 if (dfill) { 1228 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1229 } else { 1230 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1231 } 1232 } 1233 } 1234 } 1235 row = k + nc * (slot); 1236 maxcnt = PetscMax(maxcnt, cnt); 1237 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1238 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1239 } 1240 } 1241 } 1242 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1243 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1244 MatPreallocateEnd(dnz, onz); 1245 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1246 1247 /* 1248 For each node in the grid: we get the neighbors in the local (on processor ordering 1249 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1250 PETSc ordering. 1251 */ 1252 if (!da->prealloc_only) { 1253 for (i = xs; i < xs + nx; i++) { 1254 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1255 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1256 1257 for (j = ys; j < ys + ny; j++) { 1258 slot = i - gxs + gnx * (j - gys); 1259 1260 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1261 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1262 1263 for (k = 0; k < nc; k++) { 1264 cnt = 0; 1265 for (l = lstart; l < lend + 1; l++) { 1266 for (p = pstart; p < pend + 1; p++) { 1267 if (l || p) { 1268 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1269 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1270 } 1271 } else { 1272 if (dfill) { 1273 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1274 } else { 1275 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1276 } 1277 } 1278 } 1279 } 1280 row = k + nc * (slot); 1281 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1282 } 1283 } 1284 } 1285 /* do not copy values to GPU since they are all zero and not yet needed there */ 1286 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1287 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1288 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1289 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1290 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1291 } 1292 PetscCall(PetscFree(cols)); 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1297 { 1298 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1299 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1300 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1301 MPI_Comm comm; 1302 DMBoundaryType bx, by, bz; 1303 ISLocalToGlobalMapping ltog, mltog; 1304 DMDAStencilType st; 1305 PetscBool removedups = PETSC_FALSE; 1306 1307 PetscFunctionBegin; 1308 /* 1309 nc - number of components per grid point 1310 col - number of colors needed in one direction for single component problem 1311 */ 1312 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1313 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1314 col = 2 * s + 1; 1315 1316 /* 1317 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1318 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1319 */ 1320 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1321 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1322 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1323 1324 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1325 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1326 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1327 1328 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1329 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1330 1331 PetscCall(MatSetBlockSize(J, nc)); 1332 /* determine the matrix preallocation information */ 1333 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1334 for (i = xs; i < xs + nx; i++) { 1335 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1336 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1337 for (j = ys; j < ys + ny; j++) { 1338 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1339 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1340 for (k = zs; k < zs + nz; k++) { 1341 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1342 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1343 1344 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1345 1346 cnt = 0; 1347 for (l = 0; l < nc; l++) { 1348 for (ii = istart; ii < iend + 1; ii++) { 1349 for (jj = jstart; jj < jend + 1; jj++) { 1350 for (kk = kstart; kk < kend + 1; kk++) { 1351 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1352 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1353 } 1354 } 1355 } 1356 } 1357 rows[l] = l + nc * (slot); 1358 } 1359 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1360 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1361 } 1362 } 1363 } 1364 PetscCall(MatSetBlockSize(J, nc)); 1365 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1366 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1367 MatPreallocateEnd(dnz, onz); 1368 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1369 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1370 1371 /* 1372 For each node in the grid: we get the neighbors in the local (on processor ordering 1373 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1374 PETSc ordering. 1375 */ 1376 if (!da->prealloc_only) { 1377 for (i = xs; i < xs + nx; i++) { 1378 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1379 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1380 for (j = ys; j < ys + ny; j++) { 1381 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1382 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1383 for (k = zs; k < zs + nz; k++) { 1384 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1385 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1386 1387 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1388 1389 cnt = 0; 1390 for (kk = kstart; kk < kend + 1; kk++) { 1391 for (jj = jstart; jj < jend + 1; jj++) { 1392 for (ii = istart; ii < iend + 1; ii++) { 1393 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1394 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1395 for (l = 1; l < nc; l++) { 1396 cols[cnt] = 1 + cols[cnt - 1]; 1397 cnt++; 1398 } 1399 } 1400 } 1401 } 1402 } 1403 rows[0] = nc * (slot); 1404 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1405 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1406 } 1407 } 1408 } 1409 /* do not copy values to GPU since they are all zero and not yet needed there */ 1410 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1411 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1412 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1413 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1414 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1415 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1416 } 1417 PetscCall(PetscFree2(rows, cols)); 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1422 { 1423 DM_DA *dd = (DM_DA *)da->data; 1424 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1425 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1426 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1427 DMBoundaryType bx; 1428 ISLocalToGlobalMapping ltog; 1429 PetscMPIInt rank, size; 1430 1431 PetscFunctionBegin; 1432 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1433 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1434 1435 /* 1436 nc - number of components per grid point 1437 */ 1438 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1439 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1440 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1441 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1442 1443 PetscCall(MatSetBlockSize(J, nc)); 1444 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1445 1446 /* 1447 note should be smaller for first and last process with no periodic 1448 does not handle dfill 1449 */ 1450 cnt = 0; 1451 /* coupling with process to the left */ 1452 for (i = 0; i < s; i++) { 1453 for (j = 0; j < nc; j++) { 1454 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1455 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1456 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1457 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1458 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1459 } 1460 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1461 cnt++; 1462 } 1463 } 1464 for (i = s; i < nx - s; i++) { 1465 for (j = 0; j < nc; j++) { 1466 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1467 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1468 cnt++; 1469 } 1470 } 1471 /* coupling with process to the right */ 1472 for (i = nx - s; i < nx; i++) { 1473 for (j = 0; j < nc; j++) { 1474 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1475 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1476 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1477 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1478 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1479 } 1480 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1481 cnt++; 1482 } 1483 } 1484 1485 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1486 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1487 PetscCall(PetscFree2(cols, ocols)); 1488 1489 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1490 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1491 1492 /* 1493 For each node in the grid: we get the neighbors in the local (on processor ordering 1494 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1495 PETSc ordering. 1496 */ 1497 if (!da->prealloc_only) { 1498 PetscCall(PetscMalloc1(maxcnt, &cols)); 1499 row = xs * nc; 1500 /* coupling with process to the left */ 1501 for (i = xs; i < xs + s; i++) { 1502 for (j = 0; j < nc; j++) { 1503 cnt = 0; 1504 if (rank) { 1505 for (l = 0; l < s; l++) { 1506 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1507 } 1508 } 1509 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1510 for (l = 0; l < s; l++) { 1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1512 } 1513 } 1514 if (dfill) { 1515 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1516 } else { 1517 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1518 } 1519 for (l = 0; l < s; l++) { 1520 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1521 } 1522 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1523 row++; 1524 } 1525 } 1526 for (i = xs + s; i < xs + nx - s; i++) { 1527 for (j = 0; j < nc; j++) { 1528 cnt = 0; 1529 for (l = 0; l < s; l++) { 1530 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1531 } 1532 if (dfill) { 1533 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1534 } else { 1535 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1536 } 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1539 } 1540 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1541 row++; 1542 } 1543 } 1544 /* coupling with process to the right */ 1545 for (i = xs + nx - s; i < xs + nx; i++) { 1546 for (j = 0; j < nc; j++) { 1547 cnt = 0; 1548 for (l = 0; l < s; l++) { 1549 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1550 } 1551 if (dfill) { 1552 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1553 } else { 1554 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1555 } 1556 if (rank < size - 1) { 1557 for (l = 0; l < s; l++) { 1558 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1559 } 1560 } 1561 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1562 for (l = 0; l < s; l++) { 1563 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1564 } 1565 } 1566 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1567 row++; 1568 } 1569 } 1570 PetscCall(PetscFree(cols)); 1571 /* do not copy values to GPU since they are all zero and not yet needed there */ 1572 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1573 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1575 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1576 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1577 } 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1582 { 1583 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1584 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1585 PetscInt istart, iend; 1586 DMBoundaryType bx; 1587 ISLocalToGlobalMapping ltog, mltog; 1588 1589 PetscFunctionBegin; 1590 /* 1591 nc - number of components per grid point 1592 col - number of colors needed in one direction for single component problem 1593 */ 1594 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1595 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1596 col = 2 * s + 1; 1597 1598 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1599 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1600 1601 PetscCall(MatSetBlockSize(J, nc)); 1602 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1603 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1604 1605 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1606 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1607 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1608 1609 /* 1610 For each node in the grid: we get the neighbors in the local (on processor ordering 1611 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1612 PETSc ordering. 1613 */ 1614 if (!da->prealloc_only) { 1615 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1616 for (i = xs; i < xs + nx; i++) { 1617 istart = PetscMax(-s, gxs - i); 1618 iend = PetscMin(s, gxs + gnx - i - 1); 1619 slot = i - gxs; 1620 1621 cnt = 0; 1622 for (i1 = istart; i1 < iend + 1; i1++) { 1623 cols[cnt++] = nc * (slot + i1); 1624 for (l = 1; l < nc; l++) { 1625 cols[cnt] = 1 + cols[cnt - 1]; 1626 cnt++; 1627 } 1628 } 1629 rows[0] = nc * (slot); 1630 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1631 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1632 } 1633 /* do not copy values to GPU since they are all zero and not yet needed there */ 1634 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1635 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1636 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1637 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1638 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1639 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1640 PetscCall(PetscFree2(rows, cols)); 1641 } 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1646 { 1647 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1648 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1649 PetscInt istart, iend; 1650 DMBoundaryType bx; 1651 ISLocalToGlobalMapping ltog, mltog; 1652 1653 PetscFunctionBegin; 1654 /* 1655 nc - number of components per grid point 1656 col - number of colors needed in one direction for single component problem 1657 */ 1658 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1659 col = 2 * s + 1; 1660 1661 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1662 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1663 1664 PetscCall(MatSetBlockSize(J, nc)); 1665 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1666 1667 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1668 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1669 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1670 1671 /* 1672 For each node in the grid: we get the neighbors in the local (on processor ordering 1673 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1674 PETSc ordering. 1675 */ 1676 if (!da->prealloc_only) { 1677 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1678 for (i = xs; i < xs + nx; i++) { 1679 istart = PetscMax(-s, gxs - i); 1680 iend = PetscMin(s, gxs + gnx - i - 1); 1681 slot = i - gxs; 1682 1683 cnt = 0; 1684 for (i1 = istart; i1 < iend + 1; i1++) { 1685 cols[cnt++] = nc * (slot + i1); 1686 for (l = 1; l < nc; l++) { 1687 cols[cnt] = 1 + cols[cnt - 1]; 1688 cnt++; 1689 } 1690 } 1691 rows[0] = nc * (slot); 1692 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1693 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1694 } 1695 /* do not copy values to GPU since they are all zero and not yet needed there */ 1696 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1697 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1698 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1699 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1700 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1701 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1702 PetscCall(PetscFree2(rows, cols)); 1703 } 1704 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1705 PetscFunctionReturn(PETSC_SUCCESS); 1706 } 1707 1708 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1709 { 1710 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1711 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1712 PetscInt istart, iend, jstart, jend, ii, jj; 1713 MPI_Comm comm; 1714 PetscScalar *values; 1715 DMBoundaryType bx, by; 1716 DMDAStencilType st; 1717 ISLocalToGlobalMapping ltog; 1718 1719 PetscFunctionBegin; 1720 /* 1721 nc - number of components per grid point 1722 col - number of colors needed in one direction for single component problem 1723 */ 1724 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1725 col = 2 * s + 1; 1726 1727 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1728 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1729 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1730 1731 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1732 1733 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1734 1735 /* determine the matrix preallocation information */ 1736 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1737 for (i = xs; i < xs + nx; i++) { 1738 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1739 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1740 for (j = ys; j < ys + ny; j++) { 1741 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1742 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1743 slot = i - gxs + gnx * (j - gys); 1744 1745 /* Find block columns in block row */ 1746 cnt = 0; 1747 for (ii = istart; ii < iend + 1; ii++) { 1748 for (jj = jstart; jj < jend + 1; jj++) { 1749 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1750 cols[cnt++] = slot + ii + gnx * jj; 1751 } 1752 } 1753 } 1754 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1755 } 1756 } 1757 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1758 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1759 MatPreallocateEnd(dnz, onz); 1760 1761 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1762 1763 /* 1764 For each node in the grid: we get the neighbors in the local (on processor ordering 1765 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1766 PETSc ordering. 1767 */ 1768 if (!da->prealloc_only) { 1769 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1770 for (i = xs; i < xs + nx; i++) { 1771 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1772 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1773 for (j = ys; j < ys + ny; j++) { 1774 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1775 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1776 slot = i - gxs + gnx * (j - gys); 1777 cnt = 0; 1778 for (ii = istart; ii < iend + 1; ii++) { 1779 for (jj = jstart; jj < jend + 1; jj++) { 1780 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1781 cols[cnt++] = slot + ii + gnx * jj; 1782 } 1783 } 1784 } 1785 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1786 } 1787 } 1788 PetscCall(PetscFree(values)); 1789 /* do not copy values to GPU since they are all zero and not yet needed there */ 1790 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1791 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1792 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1793 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1794 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1795 } 1796 PetscCall(PetscFree(cols)); 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1801 { 1802 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1803 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1804 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1805 MPI_Comm comm; 1806 PetscScalar *values; 1807 DMBoundaryType bx, by, bz; 1808 DMDAStencilType st; 1809 ISLocalToGlobalMapping ltog; 1810 1811 PetscFunctionBegin; 1812 /* 1813 nc - number of components per grid point 1814 col - number of colors needed in one direction for single component problem 1815 */ 1816 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1817 col = 2 * s + 1; 1818 1819 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1820 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1821 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1822 1823 PetscCall(PetscMalloc1(col * col * col, &cols)); 1824 1825 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1826 1827 /* determine the matrix preallocation information */ 1828 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1829 for (i = xs; i < xs + nx; i++) { 1830 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1831 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1832 for (j = ys; j < ys + ny; j++) { 1833 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1834 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1835 for (k = zs; k < zs + nz; k++) { 1836 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1837 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1838 1839 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1840 1841 /* Find block columns in block row */ 1842 cnt = 0; 1843 for (ii = istart; ii < iend + 1; ii++) { 1844 for (jj = jstart; jj < jend + 1; jj++) { 1845 for (kk = kstart; kk < kend + 1; kk++) { 1846 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1847 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1848 } 1849 } 1850 } 1851 } 1852 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1853 } 1854 } 1855 } 1856 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1857 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1858 MatPreallocateEnd(dnz, onz); 1859 1860 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1861 1862 /* 1863 For each node in the grid: we get the neighbors in the local (on processor ordering 1864 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1865 PETSc ordering. 1866 */ 1867 if (!da->prealloc_only) { 1868 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1869 for (i = xs; i < xs + nx; i++) { 1870 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1871 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1872 for (j = ys; j < ys + ny; j++) { 1873 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1874 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1875 for (k = zs; k < zs + nz; k++) { 1876 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1877 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1878 1879 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1880 1881 cnt = 0; 1882 for (ii = istart; ii < iend + 1; ii++) { 1883 for (jj = jstart; jj < jend + 1; jj++) { 1884 for (kk = kstart; kk < kend + 1; kk++) { 1885 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1886 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1887 } 1888 } 1889 } 1890 } 1891 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1892 } 1893 } 1894 } 1895 PetscCall(PetscFree(values)); 1896 /* do not copy values to GPU since they are all zero and not yet needed there */ 1897 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1898 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1899 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1900 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1901 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1902 } 1903 PetscCall(PetscFree(cols)); 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 /* 1908 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1909 identify in the local ordering with periodic domain. 1910 */ 1911 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1912 { 1913 PetscInt i, n; 1914 1915 PetscFunctionBegin; 1916 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1917 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1918 for (i = 0, n = 0; i < *cnt; i++) { 1919 if (col[i] >= *row) col[n++] = col[i]; 1920 } 1921 *cnt = n; 1922 PetscFunctionReturn(PETSC_SUCCESS); 1923 } 1924 1925 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1926 { 1927 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1928 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1929 PetscInt istart, iend, jstart, jend, ii, jj; 1930 MPI_Comm comm; 1931 PetscScalar *values; 1932 DMBoundaryType bx, by; 1933 DMDAStencilType st; 1934 ISLocalToGlobalMapping ltog; 1935 1936 PetscFunctionBegin; 1937 /* 1938 nc - number of components per grid point 1939 col - number of colors needed in one direction for single component problem 1940 */ 1941 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1942 col = 2 * s + 1; 1943 1944 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1945 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1946 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1947 1948 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1949 1950 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1951 1952 /* determine the matrix preallocation information */ 1953 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1954 for (i = xs; i < xs + nx; i++) { 1955 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1956 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1957 for (j = ys; j < ys + ny; j++) { 1958 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1959 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1960 slot = i - gxs + gnx * (j - gys); 1961 1962 /* Find block columns in block row */ 1963 cnt = 0; 1964 for (ii = istart; ii < iend + 1; ii++) { 1965 for (jj = jstart; jj < jend + 1; jj++) { 1966 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1967 } 1968 } 1969 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1970 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1971 } 1972 } 1973 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1974 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1975 MatPreallocateEnd(dnz, onz); 1976 1977 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1978 1979 /* 1980 For each node in the grid: we get the neighbors in the local (on processor ordering 1981 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1982 PETSc ordering. 1983 */ 1984 if (!da->prealloc_only) { 1985 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1986 for (i = xs; i < xs + nx; i++) { 1987 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1988 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1989 for (j = ys; j < ys + ny; j++) { 1990 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1991 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1992 slot = i - gxs + gnx * (j - gys); 1993 1994 /* Find block columns in block row */ 1995 cnt = 0; 1996 for (ii = istart; ii < iend + 1; ii++) { 1997 for (jj = jstart; jj < jend + 1; jj++) { 1998 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1999 } 2000 } 2001 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2002 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2003 } 2004 } 2005 PetscCall(PetscFree(values)); 2006 /* do not copy values to GPU since they are all zero and not yet needed there */ 2007 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2008 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2009 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2010 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2011 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2012 } 2013 PetscCall(PetscFree(cols)); 2014 PetscFunctionReturn(PETSC_SUCCESS); 2015 } 2016 2017 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2018 { 2019 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2020 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2021 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2022 MPI_Comm comm; 2023 PetscScalar *values; 2024 DMBoundaryType bx, by, bz; 2025 DMDAStencilType st; 2026 ISLocalToGlobalMapping ltog; 2027 2028 PetscFunctionBegin; 2029 /* 2030 nc - number of components per grid point 2031 col - number of colors needed in one direction for single component problem 2032 */ 2033 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2034 col = 2 * s + 1; 2035 2036 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2037 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2038 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2039 2040 /* create the matrix */ 2041 PetscCall(PetscMalloc1(col * col * col, &cols)); 2042 2043 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2044 2045 /* determine the matrix preallocation information */ 2046 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2047 for (i = xs; i < xs + nx; i++) { 2048 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2049 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2050 for (j = ys; j < ys + ny; j++) { 2051 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2052 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2053 for (k = zs; k < zs + nz; k++) { 2054 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2055 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2056 2057 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2058 2059 /* Find block columns in block row */ 2060 cnt = 0; 2061 for (ii = istart; ii < iend + 1; ii++) { 2062 for (jj = jstart; jj < jend + 1; jj++) { 2063 for (kk = kstart; kk < kend + 1; kk++) { 2064 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2065 } 2066 } 2067 } 2068 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2069 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2070 } 2071 } 2072 } 2073 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2074 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2075 MatPreallocateEnd(dnz, onz); 2076 2077 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2078 2079 /* 2080 For each node in the grid: we get the neighbors in the local (on processor ordering 2081 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2082 PETSc ordering. 2083 */ 2084 if (!da->prealloc_only) { 2085 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2086 for (i = xs; i < xs + nx; i++) { 2087 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2088 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2089 for (j = ys; j < ys + ny; j++) { 2090 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2091 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2092 for (k = zs; k < zs + nz; k++) { 2093 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2094 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2095 2096 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2097 2098 cnt = 0; 2099 for (ii = istart; ii < iend + 1; ii++) { 2100 for (jj = jstart; jj < jend + 1; jj++) { 2101 for (kk = kstart; kk < kend + 1; kk++) { 2102 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2103 } 2104 } 2105 } 2106 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2107 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2108 } 2109 } 2110 } 2111 PetscCall(PetscFree(values)); 2112 /* do not copy values to GPU since they are all zero and not yet needed there */ 2113 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2114 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2115 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2116 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2117 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2118 } 2119 PetscCall(PetscFree(cols)); 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2124 { 2125 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2126 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2127 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2128 DM_DA *dd = (DM_DA *)da->data; 2129 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2130 MPI_Comm comm; 2131 PetscScalar *values; 2132 DMBoundaryType bx, by, bz; 2133 ISLocalToGlobalMapping ltog; 2134 DMDAStencilType st; 2135 PetscBool removedups = PETSC_FALSE; 2136 2137 PetscFunctionBegin; 2138 /* 2139 nc - number of components per grid point 2140 col - number of colors needed in one direction for single component problem 2141 */ 2142 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2143 col = 2 * s + 1; 2144 2145 /* 2146 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2147 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2148 */ 2149 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2150 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2151 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2152 2153 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2154 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2155 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2156 2157 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2158 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2159 2160 /* determine the matrix preallocation information */ 2161 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2162 2163 PetscCall(MatSetBlockSize(J, nc)); 2164 for (i = xs; i < xs + nx; i++) { 2165 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2166 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2167 for (j = ys; j < ys + ny; j++) { 2168 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2169 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2170 for (k = zs; k < zs + nz; k++) { 2171 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2172 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2173 2174 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2175 2176 for (l = 0; l < nc; l++) { 2177 cnt = 0; 2178 for (ii = istart; ii < iend + 1; ii++) { 2179 for (jj = jstart; jj < jend + 1; jj++) { 2180 for (kk = kstart; kk < kend + 1; kk++) { 2181 if (ii || jj || kk) { 2182 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2183 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2184 } 2185 } else { 2186 if (dfill) { 2187 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2188 } else { 2189 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2190 } 2191 } 2192 } 2193 } 2194 } 2195 row = l + nc * (slot); 2196 maxcnt = PetscMax(maxcnt, cnt); 2197 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2198 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2199 } 2200 } 2201 } 2202 } 2203 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2204 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2205 MatPreallocateEnd(dnz, onz); 2206 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2207 2208 /* 2209 For each node in the grid: we get the neighbors in the local (on processor ordering 2210 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2211 PETSc ordering. 2212 */ 2213 if (!da->prealloc_only) { 2214 PetscCall(PetscCalloc1(maxcnt, &values)); 2215 for (i = xs; i < xs + nx; i++) { 2216 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2217 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2218 for (j = ys; j < ys + ny; j++) { 2219 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2220 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2221 for (k = zs; k < zs + nz; k++) { 2222 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2223 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2224 2225 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2226 2227 for (l = 0; l < nc; l++) { 2228 cnt = 0; 2229 for (ii = istart; ii < iend + 1; ii++) { 2230 for (jj = jstart; jj < jend + 1; jj++) { 2231 for (kk = kstart; kk < kend + 1; kk++) { 2232 if (ii || jj || kk) { 2233 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2234 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2235 } 2236 } else { 2237 if (dfill) { 2238 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2239 } else { 2240 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2241 } 2242 } 2243 } 2244 } 2245 } 2246 row = l + nc * (slot); 2247 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2248 } 2249 } 2250 } 2251 } 2252 PetscCall(PetscFree(values)); 2253 /* do not copy values to GPU since they are all zero and not yet needed there */ 2254 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2255 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2256 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2257 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2258 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2259 } 2260 PetscCall(PetscFree(cols)); 2261 PetscFunctionReturn(PETSC_SUCCESS); 2262 } 2263