1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 /* ofillcount tracks the columns of ofill that have any nonzero in them; the value in each location is the number of 72 columns to the left with any nonzeros in them plus 1 */ 73 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 74 for (i = 0; i < dd->w; i++) { 75 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 76 } 77 for (i = 0; i < dd->w; i++) { 78 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 79 } 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 /*@ 84 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 85 of the matrix returned by `DMCreateMatrix()`. 86 87 Logically Collective 88 89 Input Parameters: 90 + da - the `DMDA` 91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block) 92 - ofill - the fill pattern in the off-diagonal blocks 93 94 Level: developer 95 96 Notes: 97 This only makes sense when you are doing multicomponent problems but using the 98 `MATMPIAIJ` matrix format 99 100 The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries 101 representing coupling and 0 entries for missing coupling. For example 102 .vb 103 dfill[9] = {1, 0, 0, 104 1, 1, 0, 105 0, 1, 1} 106 .ve 107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 109 diagonal block). 110 111 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 112 can be represented in the `dfill`, `ofill` format 113 114 Contributed by\: 115 Glenn Hammond 116 117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the `DMDA` 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by\: 169 Philip C. Roth 170 171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process"); 225 } 226 227 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 228 matrices is for the blocks, not the individual matrix elements */ 229 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 230 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 231 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 232 if (isBAIJ) { 233 dd->w = 1; 234 dd->xs = dd->xs / nc; 235 dd->xe = dd->xe / nc; 236 dd->Xs = dd->Xs / nc; 237 dd->Xe = dd->Xe / nc; 238 } 239 240 /* 241 We do not provide a getcoloring function in the DMDA operations because 242 the basic DMDA does not know about matrices. We think of DMDA as being 243 more low-level then matrices. 244 */ 245 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 246 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 247 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 248 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 249 if (isBAIJ) { 250 dd->w = nc; 251 dd->xs = dd->xs * nc; 252 dd->xe = dd->xe * nc; 253 dd->Xs = dd->Xs * nc; 254 dd->Xe = dd->Xe * nc; 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 260 { 261 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 262 PetscInt ncolors = 0; 263 MPI_Comm comm; 264 DMBoundaryType bx, by; 265 DMDAStencilType st; 266 ISColoringValue *colors; 267 DM_DA *dd = (DM_DA *)da->data; 268 269 PetscFunctionBegin; 270 /* 271 nc - number of components per grid point 272 col - number of colors needed in one direction for single component problem 273 274 */ 275 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 276 col = 2 * s + 1; 277 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 278 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 279 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 280 281 /* special case as taught to us by Paul Hovland */ 282 if (st == DMDA_STENCIL_STAR && s == 1) { 283 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 284 } else { 285 if (ctype == IS_COLORING_GLOBAL) { 286 if (!dd->localcoloring) { 287 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 288 ii = 0; 289 for (j = ys; j < ys + ny; j++) { 290 for (i = xs; i < xs + nx; i++) { 291 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++)); 292 } 293 } 294 ncolors = nc + nc * (col - 1 + col * (col - 1)); 295 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 296 } 297 *coloring = dd->localcoloring; 298 } else if (ctype == IS_COLORING_LOCAL) { 299 if (!dd->ghostedcoloring) { 300 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 301 ii = 0; 302 for (j = gys; j < gys + gny; j++) { 303 for (i = gxs; i < gxs + gnx; i++) { 304 for (k = 0; k < nc; k++) { 305 /* the complicated stuff is to handle periodic boundaries */ 306 PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++)); 307 } 308 } 309 } 310 ncolors = nc + nc * (col - 1 + col * (col - 1)); 311 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 312 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 313 314 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 315 } 316 *coloring = dd->ghostedcoloring; 317 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 318 } 319 PetscCall(ISColoringReference(*coloring)); 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 324 { 325 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 326 PetscInt ncolors; 327 MPI_Comm comm; 328 DMBoundaryType bx, by, bz; 329 DMDAStencilType st; 330 ISColoringValue *colors; 331 DM_DA *dd = (DM_DA *)da->data; 332 333 PetscFunctionBegin; 334 /* 335 nc - number of components per grid point 336 col - number of colors needed in one direction for single component problem 337 */ 338 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 339 col = 2 * s + 1; 340 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1"); 341 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1"); 342 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1"); 343 344 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 345 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 346 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 347 348 /* create the coloring */ 349 if (ctype == IS_COLORING_GLOBAL) { 350 if (!dd->localcoloring) { 351 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 352 ii = 0; 353 for (k = zs; k < zs + nz; k++) { 354 for (j = ys; j < ys + ny; j++) { 355 for (i = xs; i < xs + nx; i++) { 356 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++)); 357 } 358 } 359 } 360 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 361 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 362 } 363 *coloring = dd->localcoloring; 364 } else if (ctype == IS_COLORING_LOCAL) { 365 if (!dd->ghostedcoloring) { 366 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 367 ii = 0; 368 for (k = gzs; k < gzs + gnz; k++) { 369 for (j = gys; j < gys + gny; j++) { 370 for (i = gxs; i < gxs + gnx; i++) { 371 for (l = 0; l < nc; l++) { 372 /* the complicated stuff is to handle periodic boundaries */ 373 PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++)); 374 } 375 } 376 } 377 } 378 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 379 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 380 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 381 } 382 *coloring = dd->ghostedcoloring; 383 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 384 PetscCall(ISColoringReference(*coloring)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 389 { 390 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 391 PetscInt ncolors; 392 MPI_Comm comm; 393 DMBoundaryType bx; 394 ISColoringValue *colors; 395 DM_DA *dd = (DM_DA *)da->data; 396 397 PetscFunctionBegin; 398 /* 399 nc - number of components per grid point 400 col - number of colors needed in one direction for single component problem 401 */ 402 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 403 col = 2 * s + 1; 404 PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col); 405 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 406 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 407 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 408 409 /* create the coloring */ 410 if (ctype == IS_COLORING_GLOBAL) { 411 if (!dd->localcoloring) { 412 PetscCall(PetscMalloc1(nc * nx, &colors)); 413 if (dd->ofillcols) { 414 PetscInt tc = 0; 415 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 416 i1 = 0; 417 for (i = xs; i < xs + nx; i++) { 418 for (l = 0; l < nc; l++) { 419 if (dd->ofillcols[l] && (i % col)) { 420 PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++)); 421 } else { 422 PetscCall(ISColoringValueCast(l, colors + i1++)); 423 } 424 } 425 } 426 ncolors = nc + 2 * s * tc; 427 } else { 428 i1 = 0; 429 for (i = xs; i < xs + nx; i++) { 430 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++)); 431 } 432 ncolors = nc + nc * (col - 1); 433 } 434 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 435 } 436 *coloring = dd->localcoloring; 437 } else if (ctype == IS_COLORING_LOCAL) { 438 if (!dd->ghostedcoloring) { 439 PetscCall(PetscMalloc1(nc * gnx, &colors)); 440 i1 = 0; 441 for (i = gxs; i < gxs + gnx; i++) { 442 for (l = 0; l < nc; l++) { 443 /* the complicated stuff is to handle periodic boundaries */ 444 PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++)); 445 } 446 } 447 ncolors = nc + nc * (col - 1); 448 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 449 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 450 } 451 *coloring = dd->ghostedcoloring; 452 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 453 PetscCall(ISColoringReference(*coloring)); 454 PetscFunctionReturn(PETSC_SUCCESS); 455 } 456 457 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 458 { 459 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 460 PetscInt ncolors; 461 MPI_Comm comm; 462 DMBoundaryType bx, by; 463 ISColoringValue *colors; 464 DM_DA *dd = (DM_DA *)da->data; 465 466 PetscFunctionBegin; 467 /* 468 nc - number of components per grid point 469 col - number of colors needed in one direction for single component problem 470 */ 471 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 472 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 473 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 474 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 475 /* create the coloring */ 476 if (ctype == IS_COLORING_GLOBAL) { 477 if (!dd->localcoloring) { 478 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 479 ii = 0; 480 for (j = ys; j < ys + ny; j++) { 481 for (i = xs; i < xs + nx; i++) { 482 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++)); 483 } 484 } 485 ncolors = 5 * nc; 486 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 487 } 488 *coloring = dd->localcoloring; 489 } else if (ctype == IS_COLORING_LOCAL) { 490 if (!dd->ghostedcoloring) { 491 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 492 ii = 0; 493 for (j = gys; j < gys + gny; j++) { 494 for (i = gxs; i < gxs + gnx; i++) { 495 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++)); 496 } 497 } 498 ncolors = 5 * nc; 499 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 500 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 501 } 502 *coloring = dd->ghostedcoloring; 503 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 /* =========================================================================== */ 508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 509 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 510 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 512 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 522 523 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 524 { 525 DM da; 526 const char *prefix; 527 Mat AA, Anatural; 528 AO ao; 529 PetscInt rstart, rend, *petsc, i; 530 IS is; 531 MPI_Comm comm; 532 PetscViewerFormat format; 533 PetscBool flag; 534 535 PetscFunctionBegin; 536 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 537 PetscCall(PetscViewerGetFormat(viewer, &format)); 538 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 539 540 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 541 PetscCall(MatGetDM(A, &da)); 542 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 543 544 PetscCall(DMDAGetAO(da, &ao)); 545 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 546 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 547 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 548 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 549 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 550 551 /* call viewer on natural ordering */ 552 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 553 if (flag) { 554 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 555 A = AA; 556 } 557 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 558 PetscCall(ISDestroy(&is)); 559 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 560 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 561 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 562 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 563 PetscCall(MatView(Anatural, viewer)); 564 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 565 PetscCall(MatDestroy(&Anatural)); 566 if (flag) PetscCall(MatDestroy(&AA)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 571 { 572 DM da; 573 Mat Anatural, Aapp; 574 AO ao; 575 PetscInt rstart, rend, *app, i, m, n, M, N; 576 IS is; 577 MPI_Comm comm; 578 579 PetscFunctionBegin; 580 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 581 PetscCall(MatGetDM(A, &da)); 582 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 583 584 /* Load the matrix in natural ordering */ 585 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 586 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 587 PetscCall(MatGetSize(A, &M, &N)); 588 PetscCall(MatGetLocalSize(A, &m, &n)); 589 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 590 PetscCall(MatLoad(Anatural, viewer)); 591 592 /* Map natural ordering to application ordering and create IS */ 593 PetscCall(DMDAGetAO(da, &ao)); 594 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 595 PetscCall(PetscMalloc1(rend - rstart, &app)); 596 for (i = rstart; i < rend; i++) app[i - rstart] = i; 597 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 598 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 599 600 /* Do permutation and replace header */ 601 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 602 PetscCall(MatHeaderReplace(A, &Aapp)); 603 PetscCall(ISDestroy(&is)); 604 PetscCall(MatDestroy(&Anatural)); 605 PetscFunctionReturn(PETSC_SUCCESS); 606 } 607 608 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 609 { 610 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 611 Mat A; 612 MPI_Comm comm; 613 MatType Atype; 614 MatType mtype; 615 PetscMPIInt size; 616 DM_DA *dd = (DM_DA *)da->data; 617 PetscBool aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE; 618 619 PetscFunctionBegin; 620 PetscCall(MatInitializePackage()); 621 mtype = da->mattype; 622 623 /* 624 m 625 ------------------------------------------------------ 626 | | 627 | | 628 | ---------------------- | 629 | | | | 630 n | ny | | | 631 | | | | 632 | .--------------------- | 633 | (xs,ys) nx | 634 | . | 635 | (gxs,gys) | 636 | | 637 ----------------------------------------------------- 638 */ 639 640 /* 641 nc - number of components per grid point 642 col - number of colors needed in one direction for single component problem 643 */ 644 M = dd->M; 645 N = dd->N; 646 P = dd->P; 647 dim = da->dim; 648 dof = dd->w; 649 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 650 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 651 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 652 PetscCall(MatCreate(comm, &A)); 653 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 654 PetscCall(MatSetType(A, mtype)); 655 PetscCall(MatSetFromOptions(A)); 656 if (dof * nx * ny * nz < da->bind_below) { 657 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 658 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 659 } 660 PetscCall(MatSetDM(A, da)); 661 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 662 PetscCall(MatGetType(A, &Atype)); 663 /* 664 We do not provide a getmatrix function in the DMDA operations because 665 the basic DMDA does not know about matrices. We think of DMDA as being more 666 more low-level than matrices. This is kind of cheating but, cause sometimes 667 we think of DMDA has higher level than matrices. 668 669 We could switch based on Atype (or mtype), but we do not since the 670 specialized setting routines depend only on the particular preallocation 671 details of the matrix, not the type itself. 672 */ 673 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 674 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 675 if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 676 if (!aij) { 677 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 678 if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 679 if (!baij) { 680 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 681 if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 682 if (!sbaij) { 683 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 684 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 685 } 686 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 687 } 688 } 689 } 690 if (aij) { 691 if (dim == 1) { 692 if (dd->ofill) { 693 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 694 } else { 695 DMBoundaryType bx; 696 PetscMPIInt size; 697 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 699 if (size == 1 && bx == DM_BOUNDARY_NONE) { 700 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 701 } else { 702 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 703 } 704 } 705 } else if (dim == 2) { 706 if (dd->ofill) { 707 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 708 } else { 709 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 710 } 711 } else if (dim == 3) { 712 if (dd->ofill) { 713 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 714 } else { 715 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 716 } 717 } 718 } else if (baij) { 719 if (dim == 2) { 720 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 721 } else if (dim == 3) { 722 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 723 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 724 } else if (sbaij) { 725 if (dim == 2) { 726 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 727 } else if (dim == 3) { 728 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 729 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 730 } else if (sell) { 731 if (dim == 2) { 732 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 733 } else if (dim == 3) { 734 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 735 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 736 } else if (is) { 737 PetscCall(DMCreateMatrix_DA_IS(da, A)); 738 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 739 ISLocalToGlobalMapping ltog; 740 741 PetscCall(MatSetBlockSize(A, dof)); 742 PetscCall(MatSetUp(A)); 743 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 744 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 745 } 746 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 747 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 748 PetscCall(MatSetDM(A, da)); 749 PetscCallMPI(MPI_Comm_size(comm, &size)); 750 if (size > 1) { 751 /* change viewer to display matrix in natural ordering */ 752 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 753 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 754 } 755 *J = A; 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 760 { 761 DM_DA *da = (DM_DA *)dm->data; 762 Mat lJ, P; 763 ISLocalToGlobalMapping ltog; 764 IS is; 765 PetscBT bt; 766 const PetscInt *e_loc, *idx; 767 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 768 769 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 770 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 771 PetscFunctionBegin; 772 dof = da->w; 773 PetscCall(MatSetBlockSize(J, dof)); 774 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 775 776 /* flag local elements indices in local DMDA numbering */ 777 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 778 PetscCall(PetscBTCreate(nv / dof, &bt)); 779 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 780 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 781 782 /* filter out (set to -1) the global indices not used by the local elements */ 783 PetscCall(PetscMalloc1(nv / dof, &gidx)); 784 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 785 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 786 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 787 for (i = 0; i < nv / dof; i++) 788 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 789 PetscCall(PetscBTDestroy(&bt)); 790 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 791 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 792 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 793 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 794 PetscCall(ISDestroy(&is)); 795 796 /* Preallocation */ 797 if (dm->prealloc_skip) { 798 PetscCall(MatSetUp(J)); 799 } else { 800 PetscCall(MatISGetLocalMat(J, &lJ)); 801 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 802 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 803 PetscCall(MatSetType(P, MATPREALLOCATOR)); 804 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 805 PetscCall(MatGetSize(lJ, &N, NULL)); 806 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 807 PetscCall(MatSetSizes(P, n, n, N, N)); 808 PetscCall(MatSetBlockSize(P, dof)); 809 PetscCall(MatSetUp(P)); 810 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 811 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 812 PetscCall(MatISRestoreLocalMat(J, &lJ)); 813 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 814 PetscCall(MatDestroy(&P)); 815 816 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 817 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 818 } 819 PetscFunctionReturn(PETSC_SUCCESS); 820 } 821 822 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 823 { 824 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 825 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 826 MPI_Comm comm; 827 PetscScalar *values; 828 DMBoundaryType bx, by; 829 ISLocalToGlobalMapping ltog; 830 DMDAStencilType st; 831 832 PetscFunctionBegin; 833 /* 834 nc - number of components per grid point 835 col - number of colors needed in one direction for single component problem 836 */ 837 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 838 col = 2 * s + 1; 839 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 840 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 841 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 842 843 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 844 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 845 846 PetscCall(MatSetBlockSize(J, nc)); 847 /* determine the matrix preallocation information */ 848 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 849 for (i = xs; i < xs + nx; i++) { 850 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 851 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 852 853 for (j = ys; j < ys + ny; j++) { 854 slot = i - gxs + gnx * (j - gys); 855 856 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 857 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 858 859 cnt = 0; 860 for (k = 0; k < nc; k++) { 861 for (l = lstart; l < lend + 1; l++) { 862 for (p = pstart; p < pend + 1; p++) { 863 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */ 864 cols[cnt++] = k + nc * (slot + gnx * l + p); 865 } 866 } 867 } 868 rows[k] = k + nc * (slot); 869 } 870 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 871 } 872 } 873 PetscCall(MatSetBlockSize(J, nc)); 874 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 875 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 876 MatPreallocateEnd(dnz, onz); 877 878 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 879 880 /* 881 For each node in the grid: we get the neighbors in the local (on processor ordering 882 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 883 PETSc ordering. 884 */ 885 if (!da->prealloc_only) { 886 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 887 for (i = xs; i < xs + nx; i++) { 888 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 889 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 890 891 for (j = ys; j < ys + ny; j++) { 892 slot = i - gxs + gnx * (j - gys); 893 894 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 895 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 896 897 cnt = 0; 898 for (k = 0; k < nc; k++) { 899 for (l = lstart; l < lend + 1; l++) { 900 for (p = pstart; p < pend + 1; p++) { 901 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */ 902 cols[cnt++] = k + nc * (slot + gnx * l + p); 903 } 904 } 905 } 906 rows[k] = k + nc * (slot); 907 } 908 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 909 } 910 } 911 PetscCall(PetscFree(values)); 912 /* do not copy values to GPU since they are all zero and not yet needed there */ 913 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 914 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 916 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 917 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 918 } 919 PetscCall(PetscFree2(rows, cols)); 920 PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 923 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 924 { 925 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 926 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 927 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 928 MPI_Comm comm; 929 PetscScalar *values; 930 DMBoundaryType bx, by, bz; 931 ISLocalToGlobalMapping ltog; 932 DMDAStencilType st; 933 934 PetscFunctionBegin; 935 /* 936 nc - number of components per grid point 937 col - number of colors needed in one direction for single component problem 938 */ 939 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 940 col = 2 * s + 1; 941 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 942 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 943 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 944 945 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 946 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 947 948 PetscCall(MatSetBlockSize(J, nc)); 949 /* determine the matrix preallocation information */ 950 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 951 for (i = xs; i < xs + nx; i++) { 952 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 953 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 954 for (j = ys; j < ys + ny; j++) { 955 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 956 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 957 for (k = zs; k < zs + nz; k++) { 958 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 959 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 960 961 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 962 963 cnt = 0; 964 for (l = 0; l < nc; l++) { 965 for (ii = istart; ii < iend + 1; ii++) { 966 for (jj = jstart; jj < jend + 1; jj++) { 967 for (kk = kstart; kk < kend + 1; kk++) { 968 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 969 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 970 } 971 } 972 } 973 } 974 rows[l] = l + nc * (slot); 975 } 976 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 977 } 978 } 979 } 980 PetscCall(MatSetBlockSize(J, nc)); 981 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 982 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 983 MatPreallocateEnd(dnz, onz); 984 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 985 986 /* 987 For each node in the grid: we get the neighbors in the local (on processor ordering 988 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 989 PETSc ordering. 990 */ 991 if (!da->prealloc_only) { 992 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 993 for (i = xs; i < xs + nx; i++) { 994 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 995 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 996 for (j = ys; j < ys + ny; j++) { 997 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 998 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 999 for (k = zs; k < zs + nz; k++) { 1000 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1001 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1002 1003 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1004 1005 cnt = 0; 1006 for (l = 0; l < nc; l++) { 1007 for (ii = istart; ii < iend + 1; ii++) { 1008 for (jj = jstart; jj < jend + 1; jj++) { 1009 for (kk = kstart; kk < kend + 1; kk++) { 1010 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1011 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1012 } 1013 } 1014 } 1015 } 1016 rows[l] = l + nc * (slot); 1017 } 1018 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1019 } 1020 } 1021 } 1022 PetscCall(PetscFree(values)); 1023 /* do not copy values to GPU since they are all zero and not yet needed there */ 1024 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1025 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1026 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1027 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1028 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1029 } 1030 PetscCall(PetscFree2(rows, cols)); 1031 PetscFunctionReturn(PETSC_SUCCESS); 1032 } 1033 1034 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1035 { 1036 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1037 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1038 MPI_Comm comm; 1039 DMBoundaryType bx, by; 1040 ISLocalToGlobalMapping ltog, mltog; 1041 DMDAStencilType st; 1042 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1043 1044 PetscFunctionBegin; 1045 /* 1046 nc - number of components per grid point 1047 col - number of colors needed in one direction for single component problem 1048 */ 1049 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1050 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1051 col = 2 * s + 1; 1052 /* 1053 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1054 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1055 */ 1056 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1057 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1058 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1059 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1060 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1061 1062 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1063 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1064 1065 PetscCall(MatSetBlockSize(J, nc)); 1066 /* determine the matrix preallocation information */ 1067 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1068 for (i = xs; i < xs + nx; i++) { 1069 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1070 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1071 1072 for (j = ys; j < ys + ny; j++) { 1073 slot = i - gxs + gnx * (j - gys); 1074 1075 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1076 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1077 1078 cnt = 0; 1079 for (k = 0; k < nc; k++) { 1080 for (l = lstart; l < lend + 1; l++) { 1081 for (p = pstart; p < pend + 1; p++) { 1082 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */ 1083 cols[cnt++] = k + nc * (slot + gnx * l + p); 1084 } 1085 } 1086 } 1087 rows[k] = k + nc * (slot); 1088 } 1089 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1090 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1091 } 1092 } 1093 PetscCall(MatSetBlockSize(J, nc)); 1094 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1095 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1096 MatPreallocateEnd(dnz, onz); 1097 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1098 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1099 1100 /* 1101 For each node in the grid: we get the neighbors in the local (on processor ordering 1102 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1103 PETSc ordering. 1104 */ 1105 if (!da->prealloc_only) { 1106 for (i = xs; i < xs + nx; i++) { 1107 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1108 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1109 1110 for (j = ys; j < ys + ny; j++) { 1111 slot = i - gxs + gnx * (j - gys); 1112 1113 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1114 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1115 1116 cnt = 0; 1117 for (l = lstart; l < lend + 1; l++) { 1118 for (p = pstart; p < pend + 1; p++) { 1119 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */ 1120 cols[cnt++] = nc * (slot + gnx * l + p); 1121 for (k = 1; k < nc; k++) { 1122 cols[cnt] = 1 + cols[cnt - 1]; 1123 cnt++; 1124 } 1125 } 1126 } 1127 } 1128 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1129 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1130 } 1131 } 1132 /* do not copy values to GPU since they are all zero and not yet needed there */ 1133 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1134 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1135 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1136 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1137 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1138 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1139 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1140 } 1141 PetscCall(PetscFree2(rows, cols)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1146 { 1147 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1148 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1149 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1150 DM_DA *dd = (DM_DA *)da->data; 1151 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1152 MPI_Comm comm; 1153 DMBoundaryType bx, by; 1154 ISLocalToGlobalMapping ltog; 1155 DMDAStencilType st; 1156 PetscBool removedups = PETSC_FALSE; 1157 1158 PetscFunctionBegin; 1159 /* 1160 nc - number of components per grid point 1161 col - number of colors needed in one direction for single component problem 1162 */ 1163 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1164 col = 2 * s + 1; 1165 /* 1166 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1167 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1168 */ 1169 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1170 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1171 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1172 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1173 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1174 1175 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1176 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1177 1178 PetscCall(MatSetBlockSize(J, nc)); 1179 /* determine the matrix preallocation information */ 1180 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1181 for (i = xs; i < xs + nx; i++) { 1182 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1183 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1184 1185 for (j = ys; j < ys + ny; j++) { 1186 slot = i - gxs + gnx * (j - gys); 1187 1188 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1189 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1190 1191 for (k = 0; k < nc; k++) { 1192 cnt = 0; 1193 for (l = lstart; l < lend + 1; l++) { 1194 for (p = pstart; p < pend + 1; p++) { 1195 if (l || p) { 1196 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */ 1197 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1198 } 1199 } else { 1200 if (dfill) { 1201 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1202 } else { 1203 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1204 } 1205 } 1206 } 1207 } 1208 row = k + nc * (slot); 1209 maxcnt = PetscMax(maxcnt, cnt); 1210 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1211 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1212 } 1213 } 1214 } 1215 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1216 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1217 MatPreallocateEnd(dnz, onz); 1218 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1219 1220 /* 1221 For each node in the grid: we get the neighbors in the local (on processor ordering 1222 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1223 PETSc ordering. 1224 */ 1225 if (!da->prealloc_only) { 1226 for (i = xs; i < xs + nx; i++) { 1227 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1228 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1229 1230 for (j = ys; j < ys + ny; j++) { 1231 slot = i - gxs + gnx * (j - gys); 1232 1233 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1234 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1235 1236 for (k = 0; k < nc; k++) { 1237 cnt = 0; 1238 for (l = lstart; l < lend + 1; l++) { 1239 for (p = pstart; p < pend + 1; p++) { 1240 if (l || p) { 1241 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */ 1242 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1243 } 1244 } else { 1245 if (dfill) { 1246 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1247 } else { 1248 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1249 } 1250 } 1251 } 1252 } 1253 row = k + nc * (slot); 1254 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1255 } 1256 } 1257 } 1258 /* do not copy values to GPU since they are all zero and not yet needed there */ 1259 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1260 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1261 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1262 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1263 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1264 } 1265 PetscCall(PetscFree(cols)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1270 { 1271 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1272 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1273 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1274 MPI_Comm comm; 1275 DMBoundaryType bx, by, bz; 1276 ISLocalToGlobalMapping ltog, mltog; 1277 DMDAStencilType st; 1278 PetscBool removedups = PETSC_FALSE; 1279 1280 PetscFunctionBegin; 1281 /* 1282 nc - number of components per grid point 1283 col - number of colors needed in one direction for single component problem 1284 */ 1285 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1286 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1287 col = 2 * s + 1; 1288 1289 /* 1290 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1291 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1292 */ 1293 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1294 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1295 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1296 1297 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1298 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1299 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1300 1301 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1302 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1303 1304 PetscCall(MatSetBlockSize(J, nc)); 1305 /* determine the matrix preallocation information */ 1306 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1307 for (i = xs; i < xs + nx; i++) { 1308 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1309 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1310 for (j = ys; j < ys + ny; j++) { 1311 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1312 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1313 for (k = zs; k < zs + nz; k++) { 1314 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1315 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1316 1317 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1318 1319 cnt = 0; 1320 for (l = 0; l < nc; l++) { 1321 for (ii = istart; ii < iend + 1; ii++) { 1322 for (jj = jstart; jj < jend + 1; jj++) { 1323 for (kk = kstart; kk < kend + 1; kk++) { 1324 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1325 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1326 } 1327 } 1328 } 1329 } 1330 rows[l] = l + nc * (slot); 1331 } 1332 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1333 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1334 } 1335 } 1336 } 1337 PetscCall(MatSetBlockSize(J, nc)); 1338 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1339 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1340 MatPreallocateEnd(dnz, onz); 1341 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1342 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1343 1344 /* 1345 For each node in the grid: we get the neighbors in the local (on processor ordering 1346 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1347 PETSc ordering. 1348 */ 1349 if (!da->prealloc_only) { 1350 for (i = xs; i < xs + nx; i++) { 1351 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1352 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1353 for (j = ys; j < ys + ny; j++) { 1354 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1355 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1356 for (k = zs; k < zs + nz; k++) { 1357 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1358 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1359 1360 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1361 1362 cnt = 0; 1363 for (kk = kstart; kk < kend + 1; kk++) { 1364 for (jj = jstart; jj < jend + 1; jj++) { 1365 for (ii = istart; ii < iend + 1; ii++) { 1366 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1367 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1368 for (l = 1; l < nc; l++) { 1369 cols[cnt] = 1 + cols[cnt - 1]; 1370 cnt++; 1371 } 1372 } 1373 } 1374 } 1375 } 1376 rows[0] = nc * (slot); 1377 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1378 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1379 } 1380 } 1381 } 1382 /* do not copy values to GPU since they are all zero and not yet needed there */ 1383 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1384 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1385 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1386 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1387 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1388 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1389 } 1390 PetscCall(PetscFree2(rows, cols)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1395 { 1396 DM_DA *dd = (DM_DA *)da->data; 1397 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1398 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1399 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1400 DMBoundaryType bx; 1401 ISLocalToGlobalMapping ltog; 1402 PetscMPIInt rank, size; 1403 1404 PetscFunctionBegin; 1405 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1406 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1407 1408 /* 1409 nc - number of components per grid point 1410 */ 1411 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1412 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1413 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1414 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1415 1416 PetscCall(MatSetBlockSize(J, nc)); 1417 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1418 1419 /* 1420 note should be smaller for first and last process with no periodic 1421 does not handle dfill 1422 */ 1423 cnt = 0; 1424 /* coupling with process to the left */ 1425 for (i = 0; i < s; i++) { 1426 for (j = 0; j < nc; j++) { 1427 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1428 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1429 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1430 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1431 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1432 } 1433 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1434 cnt++; 1435 } 1436 } 1437 for (i = s; i < nx - s; i++) { 1438 for (j = 0; j < nc; j++) { 1439 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1440 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1441 cnt++; 1442 } 1443 } 1444 /* coupling with process to the right */ 1445 for (i = nx - s; i < nx; i++) { 1446 for (j = 0; j < nc; j++) { 1447 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1448 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1449 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1450 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1451 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1452 } 1453 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1454 cnt++; 1455 } 1456 } 1457 1458 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1459 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1460 PetscCall(PetscFree2(cols, ocols)); 1461 1462 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1463 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1464 1465 /* 1466 For each node in the grid: we get the neighbors in the local (on processor ordering 1467 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1468 PETSc ordering. 1469 */ 1470 if (!da->prealloc_only) { 1471 PetscCall(PetscMalloc1(maxcnt, &cols)); 1472 row = xs * nc; 1473 /* coupling with process to the left */ 1474 for (i = xs; i < xs + s; i++) { 1475 for (j = 0; j < nc; j++) { 1476 cnt = 0; 1477 if (rank) { 1478 for (l = 0; l < s; l++) { 1479 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1480 } 1481 } 1482 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1483 for (l = 0; l < s; l++) { 1484 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1485 } 1486 } 1487 if (dfill) { 1488 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1489 } else { 1490 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1491 } 1492 for (l = 0; l < s; l++) { 1493 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1494 } 1495 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1496 row++; 1497 } 1498 } 1499 for (i = xs + s; i < xs + nx - s; i++) { 1500 for (j = 0; j < nc; j++) { 1501 cnt = 0; 1502 for (l = 0; l < s; l++) { 1503 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1504 } 1505 if (dfill) { 1506 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1507 } else { 1508 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1509 } 1510 for (l = 0; l < s; l++) { 1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1512 } 1513 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1514 row++; 1515 } 1516 } 1517 /* coupling with process to the right */ 1518 for (i = xs + nx - s; i < xs + nx; i++) { 1519 for (j = 0; j < nc; j++) { 1520 cnt = 0; 1521 for (l = 0; l < s; l++) { 1522 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1523 } 1524 if (dfill) { 1525 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1526 } else { 1527 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1528 } 1529 if (rank < size - 1) { 1530 for (l = 0; l < s; l++) { 1531 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1532 } 1533 } 1534 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1535 for (l = 0; l < s; l++) { 1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1537 } 1538 } 1539 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1540 row++; 1541 } 1542 } 1543 PetscCall(PetscFree(cols)); 1544 /* do not copy values to GPU since they are all zero and not yet needed there */ 1545 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1546 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1547 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1548 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1549 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1550 } 1551 PetscFunctionReturn(PETSC_SUCCESS); 1552 } 1553 1554 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1555 { 1556 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1557 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1558 PetscInt istart, iend; 1559 DMBoundaryType bx; 1560 ISLocalToGlobalMapping ltog, mltog; 1561 1562 PetscFunctionBegin; 1563 /* 1564 nc - number of components per grid point 1565 col - number of colors needed in one direction for single component problem 1566 */ 1567 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1568 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1569 col = 2 * s + 1; 1570 1571 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1572 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1573 1574 PetscCall(MatSetBlockSize(J, nc)); 1575 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1576 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1577 1578 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1579 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1580 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1581 1582 /* 1583 For each node in the grid: we get the neighbors in the local (on processor ordering 1584 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1585 PETSc ordering. 1586 */ 1587 if (!da->prealloc_only) { 1588 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1589 for (i = xs; i < xs + nx; i++) { 1590 istart = PetscMax(-s, gxs - i); 1591 iend = PetscMin(s, gxs + gnx - i - 1); 1592 slot = i - gxs; 1593 1594 cnt = 0; 1595 for (i1 = istart; i1 < iend + 1; i1++) { 1596 cols[cnt++] = nc * (slot + i1); 1597 for (l = 1; l < nc; l++) { 1598 cols[cnt] = 1 + cols[cnt - 1]; 1599 cnt++; 1600 } 1601 } 1602 rows[0] = nc * (slot); 1603 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1604 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1605 } 1606 /* do not copy values to GPU since they are all zero and not yet needed there */ 1607 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1608 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1609 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1610 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1611 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1612 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1613 PetscCall(PetscFree2(rows, cols)); 1614 } 1615 PetscFunctionReturn(PETSC_SUCCESS); 1616 } 1617 1618 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1619 { 1620 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1621 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1622 PetscInt istart, iend; 1623 DMBoundaryType bx; 1624 ISLocalToGlobalMapping ltog, mltog; 1625 1626 PetscFunctionBegin; 1627 /* 1628 nc - number of components per grid point 1629 col - number of colors needed in one direction for single component problem 1630 */ 1631 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1632 col = 2 * s + 1; 1633 1634 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1635 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1636 1637 PetscCall(MatSetBlockSize(J, nc)); 1638 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1639 1640 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1641 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1642 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1643 1644 /* 1645 For each node in the grid: we get the neighbors in the local (on processor ordering 1646 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1647 PETSc ordering. 1648 */ 1649 if (!da->prealloc_only) { 1650 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1651 for (i = xs; i < xs + nx; i++) { 1652 istart = PetscMax(-s, gxs - i); 1653 iend = PetscMin(s, gxs + gnx - i - 1); 1654 slot = i - gxs; 1655 1656 cnt = 0; 1657 for (i1 = istart; i1 < iend + 1; i1++) { 1658 cols[cnt++] = nc * (slot + i1); 1659 for (l = 1; l < nc; l++) { 1660 cols[cnt] = 1 + cols[cnt - 1]; 1661 cnt++; 1662 } 1663 } 1664 rows[0] = nc * (slot); 1665 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1666 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1667 } 1668 /* do not copy values to GPU since they are all zero and not yet needed there */ 1669 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1670 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1671 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1672 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1673 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1674 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1675 PetscCall(PetscFree2(rows, cols)); 1676 } 1677 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1678 PetscFunctionReturn(PETSC_SUCCESS); 1679 } 1680 1681 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1682 { 1683 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1684 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1685 PetscInt istart, iend, jstart, jend, ii, jj; 1686 MPI_Comm comm; 1687 PetscScalar *values; 1688 DMBoundaryType bx, by; 1689 DMDAStencilType st; 1690 ISLocalToGlobalMapping ltog; 1691 1692 PetscFunctionBegin; 1693 /* 1694 nc - number of components per grid point 1695 col - number of colors needed in one direction for single component problem 1696 */ 1697 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1698 col = 2 * s + 1; 1699 1700 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1701 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1702 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1703 1704 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1705 1706 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1707 1708 /* determine the matrix preallocation information */ 1709 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1710 for (i = xs; i < xs + nx; i++) { 1711 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1712 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1713 for (j = ys; j < ys + ny; j++) { 1714 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1715 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1716 slot = i - gxs + gnx * (j - gys); 1717 1718 /* Find block columns in block row */ 1719 cnt = 0; 1720 for (ii = istart; ii < iend + 1; ii++) { 1721 for (jj = jstart; jj < jend + 1; jj++) { 1722 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1723 cols[cnt++] = slot + ii + gnx * jj; 1724 } 1725 } 1726 } 1727 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1728 } 1729 } 1730 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1731 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1732 MatPreallocateEnd(dnz, onz); 1733 1734 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1735 1736 /* 1737 For each node in the grid: we get the neighbors in the local (on processor ordering 1738 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1739 PETSc ordering. 1740 */ 1741 if (!da->prealloc_only) { 1742 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1743 for (i = xs; i < xs + nx; i++) { 1744 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1745 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1746 for (j = ys; j < ys + ny; j++) { 1747 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1748 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1749 slot = i - gxs + gnx * (j - gys); 1750 cnt = 0; 1751 for (ii = istart; ii < iend + 1; ii++) { 1752 for (jj = jstart; jj < jend + 1; jj++) { 1753 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1754 cols[cnt++] = slot + ii + gnx * jj; 1755 } 1756 } 1757 } 1758 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1759 } 1760 } 1761 PetscCall(PetscFree(values)); 1762 /* do not copy values to GPU since they are all zero and not yet needed there */ 1763 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1764 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1765 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1766 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1767 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1768 } 1769 PetscCall(PetscFree(cols)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1774 { 1775 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1776 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1777 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1778 MPI_Comm comm; 1779 PetscScalar *values; 1780 DMBoundaryType bx, by, bz; 1781 DMDAStencilType st; 1782 ISLocalToGlobalMapping ltog; 1783 1784 PetscFunctionBegin; 1785 /* 1786 nc - number of components per grid point 1787 col - number of colors needed in one direction for single component problem 1788 */ 1789 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1790 col = 2 * s + 1; 1791 1792 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1793 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1794 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1795 1796 PetscCall(PetscMalloc1(col * col * col, &cols)); 1797 1798 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1799 1800 /* determine the matrix preallocation information */ 1801 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1802 for (i = xs; i < xs + nx; i++) { 1803 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1804 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1805 for (j = ys; j < ys + ny; j++) { 1806 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1807 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1808 for (k = zs; k < zs + nz; k++) { 1809 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1810 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1811 1812 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1813 1814 /* Find block columns in block row */ 1815 cnt = 0; 1816 for (ii = istart; ii < iend + 1; ii++) { 1817 for (jj = jstart; jj < jend + 1; jj++) { 1818 for (kk = kstart; kk < kend + 1; kk++) { 1819 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1820 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1821 } 1822 } 1823 } 1824 } 1825 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1826 } 1827 } 1828 } 1829 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1830 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1831 MatPreallocateEnd(dnz, onz); 1832 1833 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1834 1835 /* 1836 For each node in the grid: we get the neighbors in the local (on processor ordering 1837 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1838 PETSc ordering. 1839 */ 1840 if (!da->prealloc_only) { 1841 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1842 for (i = xs; i < xs + nx; i++) { 1843 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1844 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1845 for (j = ys; j < ys + ny; j++) { 1846 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1847 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1848 for (k = zs; k < zs + nz; k++) { 1849 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1850 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1851 1852 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1853 1854 cnt = 0; 1855 for (ii = istart; ii < iend + 1; ii++) { 1856 for (jj = jstart; jj < jend + 1; jj++) { 1857 for (kk = kstart; kk < kend + 1; kk++) { 1858 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1859 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1860 } 1861 } 1862 } 1863 } 1864 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1865 } 1866 } 1867 } 1868 PetscCall(PetscFree(values)); 1869 /* do not copy values to GPU since they are all zero and not yet needed there */ 1870 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1871 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1872 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1873 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1874 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1875 } 1876 PetscCall(PetscFree(cols)); 1877 PetscFunctionReturn(PETSC_SUCCESS); 1878 } 1879 1880 /* 1881 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1882 identify in the local ordering with periodic domain. 1883 */ 1884 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1885 { 1886 PetscInt i, n; 1887 1888 PetscFunctionBegin; 1889 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1890 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1891 for (i = 0, n = 0; i < *cnt; i++) { 1892 if (col[i] >= *row) col[n++] = col[i]; 1893 } 1894 *cnt = n; 1895 PetscFunctionReturn(PETSC_SUCCESS); 1896 } 1897 1898 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1899 { 1900 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1901 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1902 PetscInt istart, iend, jstart, jend, ii, jj; 1903 MPI_Comm comm; 1904 PetscScalar *values; 1905 DMBoundaryType bx, by; 1906 DMDAStencilType st; 1907 ISLocalToGlobalMapping ltog; 1908 1909 PetscFunctionBegin; 1910 /* 1911 nc - number of components per grid point 1912 col - number of colors needed in one direction for single component problem 1913 */ 1914 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1915 col = 2 * s + 1; 1916 1917 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1918 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1919 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1920 1921 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1922 1923 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1924 1925 /* determine the matrix preallocation information */ 1926 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1927 for (i = xs; i < xs + nx; i++) { 1928 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1929 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1930 for (j = ys; j < ys + ny; j++) { 1931 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1932 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1933 slot = i - gxs + gnx * (j - gys); 1934 1935 /* Find block columns in block row */ 1936 cnt = 0; 1937 for (ii = istart; ii < iend + 1; ii++) { 1938 for (jj = jstart; jj < jend + 1; jj++) { 1939 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1940 } 1941 } 1942 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1943 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1944 } 1945 } 1946 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1947 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1948 MatPreallocateEnd(dnz, onz); 1949 1950 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1951 1952 /* 1953 For each node in the grid: we get the neighbors in the local (on processor ordering 1954 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1955 PETSc ordering. 1956 */ 1957 if (!da->prealloc_only) { 1958 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1959 for (i = xs; i < xs + nx; i++) { 1960 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1961 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1962 for (j = ys; j < ys + ny; j++) { 1963 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1964 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1965 slot = i - gxs + gnx * (j - gys); 1966 1967 /* Find block columns in block row */ 1968 cnt = 0; 1969 for (ii = istart; ii < iend + 1; ii++) { 1970 for (jj = jstart; jj < jend + 1; jj++) { 1971 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1972 } 1973 } 1974 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1975 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1976 } 1977 } 1978 PetscCall(PetscFree(values)); 1979 /* do not copy values to GPU since they are all zero and not yet needed there */ 1980 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1981 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1982 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1983 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1984 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1985 } 1986 PetscCall(PetscFree(cols)); 1987 PetscFunctionReturn(PETSC_SUCCESS); 1988 } 1989 1990 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1991 { 1992 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1993 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1994 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1995 MPI_Comm comm; 1996 PetscScalar *values; 1997 DMBoundaryType bx, by, bz; 1998 DMDAStencilType st; 1999 ISLocalToGlobalMapping ltog; 2000 2001 PetscFunctionBegin; 2002 /* 2003 nc - number of components per grid point 2004 col - number of colors needed in one direction for single component problem 2005 */ 2006 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2007 col = 2 * s + 1; 2008 2009 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2010 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2011 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2012 2013 /* create the matrix */ 2014 PetscCall(PetscMalloc1(col * col * col, &cols)); 2015 2016 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2017 2018 /* determine the matrix preallocation information */ 2019 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2020 for (i = xs; i < xs + nx; i++) { 2021 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2022 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2023 for (j = ys; j < ys + ny; j++) { 2024 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2025 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2026 for (k = zs; k < zs + nz; k++) { 2027 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2028 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2029 2030 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2031 2032 /* Find block columns in block row */ 2033 cnt = 0; 2034 for (ii = istart; ii < iend + 1; ii++) { 2035 for (jj = jstart; jj < jend + 1; jj++) { 2036 for (kk = kstart; kk < kend + 1; kk++) { 2037 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2038 } 2039 } 2040 } 2041 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2042 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2043 } 2044 } 2045 } 2046 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2047 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2048 MatPreallocateEnd(dnz, onz); 2049 2050 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2051 2052 /* 2053 For each node in the grid: we get the neighbors in the local (on processor ordering 2054 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2055 PETSc ordering. 2056 */ 2057 if (!da->prealloc_only) { 2058 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2059 for (i = xs; i < xs + nx; i++) { 2060 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2061 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2062 for (j = ys; j < ys + ny; j++) { 2063 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2064 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2065 for (k = zs; k < zs + nz; k++) { 2066 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2067 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2068 2069 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2070 2071 cnt = 0; 2072 for (ii = istart; ii < iend + 1; ii++) { 2073 for (jj = jstart; jj < jend + 1; jj++) { 2074 for (kk = kstart; kk < kend + 1; kk++) { 2075 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2076 } 2077 } 2078 } 2079 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2080 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2081 } 2082 } 2083 } 2084 PetscCall(PetscFree(values)); 2085 /* do not copy values to GPU since they are all zero and not yet needed there */ 2086 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2087 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2088 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2089 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2090 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2091 } 2092 PetscCall(PetscFree(cols)); 2093 PetscFunctionReturn(PETSC_SUCCESS); 2094 } 2095 2096 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2097 { 2098 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2099 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2100 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2101 DM_DA *dd = (DM_DA *)da->data; 2102 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2103 MPI_Comm comm; 2104 PetscScalar *values; 2105 DMBoundaryType bx, by, bz; 2106 ISLocalToGlobalMapping ltog; 2107 DMDAStencilType st; 2108 PetscBool removedups = PETSC_FALSE; 2109 2110 PetscFunctionBegin; 2111 /* 2112 nc - number of components per grid point 2113 col - number of colors needed in one direction for single component problem 2114 */ 2115 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2116 col = 2 * s + 1; 2117 2118 /* 2119 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2120 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2121 */ 2122 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2123 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2124 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2125 2126 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2127 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2128 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2129 2130 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2131 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2132 2133 /* determine the matrix preallocation information */ 2134 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2135 2136 PetscCall(MatSetBlockSize(J, nc)); 2137 for (i = xs; i < xs + nx; i++) { 2138 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2139 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2140 for (j = ys; j < ys + ny; j++) { 2141 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2142 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2143 for (k = zs; k < zs + nz; k++) { 2144 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2145 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2146 2147 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2148 2149 for (l = 0; l < nc; l++) { 2150 cnt = 0; 2151 for (ii = istart; ii < iend + 1; ii++) { 2152 for (jj = jstart; jj < jend + 1; jj++) { 2153 for (kk = kstart; kk < kend + 1; kk++) { 2154 if (ii || jj || kk) { 2155 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2156 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2157 } 2158 } else { 2159 if (dfill) { 2160 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2161 } else { 2162 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2163 } 2164 } 2165 } 2166 } 2167 } 2168 row = l + nc * (slot); 2169 maxcnt = PetscMax(maxcnt, cnt); 2170 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2171 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2172 } 2173 } 2174 } 2175 } 2176 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2177 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2178 MatPreallocateEnd(dnz, onz); 2179 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2180 2181 /* 2182 For each node in the grid: we get the neighbors in the local (on processor ordering 2183 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2184 PETSc ordering. 2185 */ 2186 if (!da->prealloc_only) { 2187 PetscCall(PetscCalloc1(maxcnt, &values)); 2188 for (i = xs; i < xs + nx; i++) { 2189 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2190 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2191 for (j = ys; j < ys + ny; j++) { 2192 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2193 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2194 for (k = zs; k < zs + nz; k++) { 2195 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2196 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2197 2198 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2199 2200 for (l = 0; l < nc; l++) { 2201 cnt = 0; 2202 for (ii = istart; ii < iend + 1; ii++) { 2203 for (jj = jstart; jj < jend + 1; jj++) { 2204 for (kk = kstart; kk < kend + 1; kk++) { 2205 if (ii || jj || kk) { 2206 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2207 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2208 } 2209 } else { 2210 if (dfill) { 2211 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2212 } else { 2213 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2214 } 2215 } 2216 } 2217 } 2218 } 2219 row = l + nc * (slot); 2220 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2221 } 2222 } 2223 } 2224 } 2225 PetscCall(PetscFree(values)); 2226 /* do not copy values to GPU since they are all zero and not yet needed there */ 2227 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2228 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2229 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2230 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2231 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2232 } 2233 PetscCall(PetscFree(cols)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } 2236