1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 72 columns to the left with any nonzeros in them plus 1 */ 73 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 74 for (i = 0; i < dd->w; i++) { 75 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 76 } 77 for (i = 0; i < dd->w; i++) { 78 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 79 } 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 /*@ 84 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 85 of the matrix returned by `DMCreateMatrix()`. 86 87 Logically Collective 88 89 Input Parameters: 90 + da - the `DMDA` 91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block) 92 - ofill - the fill pattern in the off-diagonal blocks 93 94 Level: developer 95 96 Notes: 97 This only makes sense when you are doing multicomponent problems but using the 98 `MATMPIAIJ` matrix format 99 100 The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries 101 representing coupling and 0 entries for missing coupling. For example 102 .vb 103 dfill[9] = {1, 0, 0, 104 1, 1, 0, 105 0, 1, 1} 106 .ve 107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 109 diagonal block). 110 111 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 112 can be represented in the `dfill`, `ofill` format 113 114 Contributed by\: 115 Glenn Hammond 116 117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the `DMDA` 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by\: 169 Philip C. Roth 170 171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process"); 225 } 226 227 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 228 matrices is for the blocks, not the individual matrix elements */ 229 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 230 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 231 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 232 if (isBAIJ) { 233 dd->w = 1; 234 dd->xs = dd->xs / nc; 235 dd->xe = dd->xe / nc; 236 dd->Xs = dd->Xs / nc; 237 dd->Xe = dd->Xe / nc; 238 } 239 240 /* 241 We do not provide a getcoloring function in the DMDA operations because 242 the basic DMDA does not know about matrices. We think of DMDA as being 243 more low-level then matrices. 244 */ 245 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 246 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 247 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 248 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 249 if (isBAIJ) { 250 dd->w = nc; 251 dd->xs = dd->xs * nc; 252 dd->xe = dd->xe * nc; 253 dd->Xs = dd->Xs * nc; 254 dd->Xe = dd->Xe * nc; 255 } 256 PetscFunctionReturn(PETSC_SUCCESS); 257 } 258 259 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 260 { 261 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 262 PetscInt ncolors = 0; 263 MPI_Comm comm; 264 DMBoundaryType bx, by; 265 DMDAStencilType st; 266 ISColoringValue *colors; 267 DM_DA *dd = (DM_DA *)da->data; 268 269 PetscFunctionBegin; 270 /* 271 nc - number of components per grid point 272 col - number of colors needed in one direction for single component problem 273 274 */ 275 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 276 col = 2 * s + 1; 277 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 278 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 279 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 280 281 /* special case as taught to us by Paul Hovland */ 282 if (st == DMDA_STENCIL_STAR && s == 1) { 283 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 284 } else { 285 if (ctype == IS_COLORING_GLOBAL) { 286 if (!dd->localcoloring) { 287 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 288 ii = 0; 289 for (j = ys; j < ys + ny; j++) { 290 for (i = xs; i < xs + nx; i++) { 291 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++)); 292 } 293 } 294 ncolors = nc + nc * (col - 1 + col * (col - 1)); 295 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 296 } 297 *coloring = dd->localcoloring; 298 } else if (ctype == IS_COLORING_LOCAL) { 299 if (!dd->ghostedcoloring) { 300 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 301 ii = 0; 302 for (j = gys; j < gys + gny; j++) { 303 for (i = gxs; i < gxs + gnx; i++) { 304 for (k = 0; k < nc; k++) { 305 /* the complicated stuff is to handle periodic boundaries */ 306 PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++)); 307 } 308 } 309 } 310 ncolors = nc + nc * (col - 1 + col * (col - 1)); 311 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 312 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 313 314 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 315 } 316 *coloring = dd->ghostedcoloring; 317 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 318 } 319 PetscCall(ISColoringReference(*coloring)); 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 324 { 325 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 326 PetscInt ncolors; 327 MPI_Comm comm; 328 DMBoundaryType bx, by, bz; 329 DMDAStencilType st; 330 ISColoringValue *colors; 331 DM_DA *dd = (DM_DA *)da->data; 332 333 PetscFunctionBegin; 334 /* 335 nc - number of components per grid point 336 col - number of colors needed in one direction for single component problem 337 */ 338 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 339 col = 2 * s + 1; 340 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1"); 341 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1"); 342 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1"); 343 344 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 345 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 346 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 347 348 /* create the coloring */ 349 if (ctype == IS_COLORING_GLOBAL) { 350 if (!dd->localcoloring) { 351 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 352 ii = 0; 353 for (k = zs; k < zs + nz; k++) { 354 for (j = ys; j < ys + ny; j++) { 355 for (i = xs; i < xs + nx; i++) { 356 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++)); 357 } 358 } 359 } 360 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 361 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 362 } 363 *coloring = dd->localcoloring; 364 } else if (ctype == IS_COLORING_LOCAL) { 365 if (!dd->ghostedcoloring) { 366 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 367 ii = 0; 368 for (k = gzs; k < gzs + gnz; k++) { 369 for (j = gys; j < gys + gny; j++) { 370 for (i = gxs; i < gxs + gnx; i++) { 371 for (l = 0; l < nc; l++) { 372 /* the complicated stuff is to handle periodic boundaries */ 373 PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++)); 374 } 375 } 376 } 377 } 378 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 379 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 380 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 381 } 382 *coloring = dd->ghostedcoloring; 383 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 384 PetscCall(ISColoringReference(*coloring)); 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 389 { 390 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 391 PetscInt ncolors; 392 MPI_Comm comm; 393 DMBoundaryType bx; 394 ISColoringValue *colors; 395 DM_DA *dd = (DM_DA *)da->data; 396 397 PetscFunctionBegin; 398 /* 399 nc - number of components per grid point 400 col - number of colors needed in one direction for single component problem 401 */ 402 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 403 col = 2 * s + 1; 404 PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col); 405 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 406 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 407 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 408 409 /* create the coloring */ 410 if (ctype == IS_COLORING_GLOBAL) { 411 if (!dd->localcoloring) { 412 PetscCall(PetscMalloc1(nc * nx, &colors)); 413 if (dd->ofillcols) { 414 PetscInt tc = 0; 415 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 416 i1 = 0; 417 for (i = xs; i < xs + nx; i++) { 418 for (l = 0; l < nc; l++) { 419 if (dd->ofillcols[l] && (i % col)) { 420 PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++)); 421 } else { 422 PetscCall(ISColoringValueCast(l, colors + i1++)); 423 } 424 } 425 } 426 ncolors = nc + 2 * s * tc; 427 } else { 428 i1 = 0; 429 for (i = xs; i < xs + nx; i++) { 430 for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++)); 431 } 432 ncolors = nc + nc * (col - 1); 433 } 434 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 435 } 436 *coloring = dd->localcoloring; 437 } else if (ctype == IS_COLORING_LOCAL) { 438 if (!dd->ghostedcoloring) { 439 PetscCall(PetscMalloc1(nc * gnx, &colors)); 440 i1 = 0; 441 for (i = gxs; i < gxs + gnx; i++) { 442 for (l = 0; l < nc; l++) { 443 /* the complicated stuff is to handle periodic boundaries */ 444 PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++)); 445 } 446 } 447 ncolors = nc + nc * (col - 1); 448 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 449 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 450 } 451 *coloring = dd->ghostedcoloring; 452 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 453 PetscCall(ISColoringReference(*coloring)); 454 PetscFunctionReturn(PETSC_SUCCESS); 455 } 456 457 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 458 { 459 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 460 PetscInt ncolors; 461 MPI_Comm comm; 462 DMBoundaryType bx, by; 463 ISColoringValue *colors; 464 DM_DA *dd = (DM_DA *)da->data; 465 466 PetscFunctionBegin; 467 /* 468 nc - number of components per grid point 469 col - number of colors needed in one direction for single component problem 470 */ 471 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 472 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 473 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 474 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 475 /* create the coloring */ 476 if (ctype == IS_COLORING_GLOBAL) { 477 if (!dd->localcoloring) { 478 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 479 ii = 0; 480 for (j = ys; j < ys + ny; j++) { 481 for (i = xs; i < xs + nx; i++) { 482 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++)); 483 } 484 } 485 ncolors = 5 * nc; 486 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 487 } 488 *coloring = dd->localcoloring; 489 } else if (ctype == IS_COLORING_LOCAL) { 490 if (!dd->ghostedcoloring) { 491 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 492 ii = 0; 493 for (j = gys; j < gys + gny; j++) { 494 for (i = gxs; i < gxs + gnx; i++) { 495 for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++)); 496 } 497 } 498 ncolors = 5 * nc; 499 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 500 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 501 } 502 *coloring = dd->ghostedcoloring; 503 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 /* =========================================================================== */ 508 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 509 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 510 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 511 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 512 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 513 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 522 523 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 524 { 525 DM da; 526 const char *prefix; 527 Mat AA, Anatural; 528 AO ao; 529 PetscInt rstart, rend, *petsc, i; 530 IS is; 531 MPI_Comm comm; 532 PetscViewerFormat format; 533 PetscBool flag; 534 535 PetscFunctionBegin; 536 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 537 PetscCall(PetscViewerGetFormat(viewer, &format)); 538 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 539 540 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 541 PetscCall(MatGetDM(A, &da)); 542 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 543 544 PetscCall(DMDAGetAO(da, &ao)); 545 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 546 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 547 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 548 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 549 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 550 551 /* call viewer on natural ordering */ 552 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 553 if (flag) { 554 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 555 A = AA; 556 } 557 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 558 PetscCall(ISDestroy(&is)); 559 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 560 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 561 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 562 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 563 PetscCall(MatView(Anatural, viewer)); 564 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 565 PetscCall(MatDestroy(&Anatural)); 566 if (flag) PetscCall(MatDestroy(&AA)); 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 571 { 572 DM da; 573 Mat Anatural, Aapp; 574 AO ao; 575 PetscInt rstart, rend, *app, i, m, n, M, N; 576 IS is; 577 MPI_Comm comm; 578 579 PetscFunctionBegin; 580 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 581 PetscCall(MatGetDM(A, &da)); 582 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 583 584 /* Load the matrix in natural ordering */ 585 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 586 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 587 PetscCall(MatGetSize(A, &M, &N)); 588 PetscCall(MatGetLocalSize(A, &m, &n)); 589 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 590 PetscCall(MatLoad(Anatural, viewer)); 591 592 /* Map natural ordering to application ordering and create IS */ 593 PetscCall(DMDAGetAO(da, &ao)); 594 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 595 PetscCall(PetscMalloc1(rend - rstart, &app)); 596 for (i = rstart; i < rend; i++) app[i - rstart] = i; 597 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 598 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 599 600 /* Do permutation and replace header */ 601 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 602 PetscCall(MatHeaderReplace(A, &Aapp)); 603 PetscCall(ISDestroy(&is)); 604 PetscCall(MatDestroy(&Anatural)); 605 PetscFunctionReturn(PETSC_SUCCESS); 606 } 607 608 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 609 { 610 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 611 Mat A; 612 MPI_Comm comm; 613 MatType Atype; 614 MatType mtype; 615 PetscMPIInt size; 616 DM_DA *dd = (DM_DA *)da->data; 617 PetscBool aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE; 618 619 PetscFunctionBegin; 620 PetscCall(MatInitializePackage()); 621 mtype = da->mattype; 622 623 /* 624 m 625 ------------------------------------------------------ 626 | | 627 | | 628 | ---------------------- | 629 | | | | 630 n | ny | | | 631 | | | | 632 | .--------------------- | 633 | (xs,ys) nx | 634 | . | 635 | (gxs,gys) | 636 | | 637 ----------------------------------------------------- 638 */ 639 640 /* 641 nc - number of components per grid point 642 col - number of colors needed in one direction for single component problem 643 */ 644 M = dd->M; 645 N = dd->N; 646 P = dd->P; 647 dim = da->dim; 648 dof = dd->w; 649 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 650 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 651 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 652 PetscCall(MatCreate(comm, &A)); 653 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 654 PetscCall(MatSetType(A, mtype)); 655 PetscCall(MatSetFromOptions(A)); 656 if (dof * nx * ny * nz < da->bind_below) { 657 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 658 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 659 } 660 PetscCall(MatSetDM(A, da)); 661 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 662 PetscCall(MatGetType(A, &Atype)); 663 /* 664 We do not provide a getmatrix function in the DMDA operations because 665 the basic DMDA does not know about matrices. We think of DMDA as being more 666 more low-level than matrices. This is kind of cheating but, cause sometimes 667 we think of DMDA has higher level than matrices. 668 669 We could switch based on Atype (or mtype), but we do not since the 670 specialized setting routines depend only on the particular preallocation 671 details of the matrix, not the type itself. 672 */ 673 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 674 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 675 if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 676 if (!aij) { 677 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 678 if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 679 if (!baij) { 680 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 681 if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 682 if (!sbaij) { 683 PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 684 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 685 } 686 if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 687 } 688 } 689 } 690 if (aij) { 691 if (dim == 1) { 692 if (dd->ofill) { 693 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 694 } else { 695 DMBoundaryType bx; 696 PetscMPIInt size; 697 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 698 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 699 if (size == 1 && bx == DM_BOUNDARY_NONE) { 700 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 701 } else { 702 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 703 } 704 } 705 } else if (dim == 2) { 706 if (dd->ofill) { 707 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 708 } else { 709 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 710 } 711 } else if (dim == 3) { 712 if (dd->ofill) { 713 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 714 } else { 715 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 716 } 717 } 718 } else if (baij) { 719 if (dim == 2) { 720 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 721 } else if (dim == 3) { 722 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 723 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 724 } else if (sbaij) { 725 if (dim == 2) { 726 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 727 } else if (dim == 3) { 728 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 729 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 730 } else if (sell) { 731 if (dim == 2) { 732 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 733 } else if (dim == 3) { 734 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 735 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 736 } else if (is) { 737 PetscCall(DMCreateMatrix_DA_IS(da, A)); 738 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 739 ISLocalToGlobalMapping ltog; 740 741 PetscCall(MatSetBlockSize(A, dof)); 742 PetscCall(MatSetUp(A)); 743 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 744 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 745 } 746 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 747 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 748 PetscCall(MatSetDM(A, da)); 749 PetscCallMPI(MPI_Comm_size(comm, &size)); 750 if (size > 1) { 751 /* change viewer to display matrix in natural ordering */ 752 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 753 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 754 } 755 *J = A; 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 760 761 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 762 { 763 DM_DA *da = (DM_DA *)dm->data; 764 Mat lJ, P; 765 ISLocalToGlobalMapping ltog; 766 IS is; 767 PetscBT bt; 768 const PetscInt *e_loc, *idx; 769 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 770 771 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 772 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 773 PetscFunctionBegin; 774 dof = da->w; 775 PetscCall(MatSetBlockSize(J, dof)); 776 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 777 778 /* flag local elements indices in local DMDA numbering */ 779 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 780 PetscCall(PetscBTCreate(nv / dof, &bt)); 781 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 782 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 783 784 /* filter out (set to -1) the global indices not used by the local elements */ 785 PetscCall(PetscMalloc1(nv / dof, &gidx)); 786 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 787 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 788 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 789 for (i = 0; i < nv / dof; i++) 790 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 791 PetscCall(PetscBTDestroy(&bt)); 792 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 793 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 794 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 795 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 796 PetscCall(ISDestroy(&is)); 797 798 /* Preallocation */ 799 if (dm->prealloc_skip) { 800 PetscCall(MatSetUp(J)); 801 } else { 802 PetscCall(MatISGetLocalMat(J, &lJ)); 803 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 804 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 805 PetscCall(MatSetType(P, MATPREALLOCATOR)); 806 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 807 PetscCall(MatGetSize(lJ, &N, NULL)); 808 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 809 PetscCall(MatSetSizes(P, n, n, N, N)); 810 PetscCall(MatSetBlockSize(P, dof)); 811 PetscCall(MatSetUp(P)); 812 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 813 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 814 PetscCall(MatISRestoreLocalMat(J, &lJ)); 815 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 816 PetscCall(MatDestroy(&P)); 817 818 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 819 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 820 } 821 PetscFunctionReturn(PETSC_SUCCESS); 822 } 823 824 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 825 { 826 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 827 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 828 MPI_Comm comm; 829 PetscScalar *values; 830 DMBoundaryType bx, by; 831 ISLocalToGlobalMapping ltog; 832 DMDAStencilType st; 833 834 PetscFunctionBegin; 835 /* 836 nc - number of components per grid point 837 col - number of colors needed in one direction for single component problem 838 */ 839 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 840 col = 2 * s + 1; 841 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 842 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 843 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 844 845 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 846 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 847 848 PetscCall(MatSetBlockSize(J, nc)); 849 /* determine the matrix preallocation information */ 850 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 851 for (i = xs; i < xs + nx; i++) { 852 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 853 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 854 855 for (j = ys; j < ys + ny; j++) { 856 slot = i - gxs + gnx * (j - gys); 857 858 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 859 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 860 861 cnt = 0; 862 for (k = 0; k < nc; k++) { 863 for (l = lstart; l < lend + 1; l++) { 864 for (p = pstart; p < pend + 1; p++) { 865 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 866 cols[cnt++] = k + nc * (slot + gnx * l + p); 867 } 868 } 869 } 870 rows[k] = k + nc * (slot); 871 } 872 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 873 } 874 } 875 PetscCall(MatSetBlockSize(J, nc)); 876 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 877 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 878 MatPreallocateEnd(dnz, onz); 879 880 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 881 882 /* 883 For each node in the grid: we get the neighbors in the local (on processor ordering 884 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 885 PETSc ordering. 886 */ 887 if (!da->prealloc_only) { 888 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 889 for (i = xs; i < xs + nx; i++) { 890 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 891 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 892 893 for (j = ys; j < ys + ny; j++) { 894 slot = i - gxs + gnx * (j - gys); 895 896 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 897 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 898 899 cnt = 0; 900 for (k = 0; k < nc; k++) { 901 for (l = lstart; l < lend + 1; l++) { 902 for (p = pstart; p < pend + 1; p++) { 903 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 904 cols[cnt++] = k + nc * (slot + gnx * l + p); 905 } 906 } 907 } 908 rows[k] = k + nc * (slot); 909 } 910 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 911 } 912 } 913 PetscCall(PetscFree(values)); 914 /* do not copy values to GPU since they are all zero and not yet needed there */ 915 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 916 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 917 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 918 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 919 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 920 } 921 PetscCall(PetscFree2(rows, cols)); 922 PetscFunctionReturn(PETSC_SUCCESS); 923 } 924 925 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 926 { 927 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 928 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 929 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 930 MPI_Comm comm; 931 PetscScalar *values; 932 DMBoundaryType bx, by, bz; 933 ISLocalToGlobalMapping ltog; 934 DMDAStencilType st; 935 936 PetscFunctionBegin; 937 /* 938 nc - number of components per grid point 939 col - number of colors needed in one direction for single component problem 940 */ 941 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 942 col = 2 * s + 1; 943 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 944 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 945 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 946 947 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 948 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 949 950 PetscCall(MatSetBlockSize(J, nc)); 951 /* determine the matrix preallocation information */ 952 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 953 for (i = xs; i < xs + nx; i++) { 954 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 955 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 956 for (j = ys; j < ys + ny; j++) { 957 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 958 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 959 for (k = zs; k < zs + nz; k++) { 960 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 961 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 962 963 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 964 965 cnt = 0; 966 for (l = 0; l < nc; l++) { 967 for (ii = istart; ii < iend + 1; ii++) { 968 for (jj = jstart; jj < jend + 1; jj++) { 969 for (kk = kstart; kk < kend + 1; kk++) { 970 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 971 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 972 } 973 } 974 } 975 } 976 rows[l] = l + nc * (slot); 977 } 978 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 979 } 980 } 981 } 982 PetscCall(MatSetBlockSize(J, nc)); 983 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 984 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 985 MatPreallocateEnd(dnz, onz); 986 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 987 988 /* 989 For each node in the grid: we get the neighbors in the local (on processor ordering 990 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 991 PETSc ordering. 992 */ 993 if (!da->prealloc_only) { 994 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 995 for (i = xs; i < xs + nx; i++) { 996 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 997 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 998 for (j = ys; j < ys + ny; j++) { 999 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1000 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1001 for (k = zs; k < zs + nz; k++) { 1002 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1003 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1004 1005 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1006 1007 cnt = 0; 1008 for (l = 0; l < nc; l++) { 1009 for (ii = istart; ii < iend + 1; ii++) { 1010 for (jj = jstart; jj < jend + 1; jj++) { 1011 for (kk = kstart; kk < kend + 1; kk++) { 1012 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1013 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1014 } 1015 } 1016 } 1017 } 1018 rows[l] = l + nc * (slot); 1019 } 1020 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1021 } 1022 } 1023 } 1024 PetscCall(PetscFree(values)); 1025 /* do not copy values to GPU since they are all zero and not yet needed there */ 1026 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1027 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1028 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1029 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1030 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1031 } 1032 PetscCall(PetscFree2(rows, cols)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1037 { 1038 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1039 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1040 MPI_Comm comm; 1041 DMBoundaryType bx, by; 1042 ISLocalToGlobalMapping ltog, mltog; 1043 DMDAStencilType st; 1044 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1045 1046 PetscFunctionBegin; 1047 /* 1048 nc - number of components per grid point 1049 col - number of colors needed in one direction for single component problem 1050 */ 1051 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1052 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1053 col = 2 * s + 1; 1054 /* 1055 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1056 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1057 */ 1058 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1059 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1060 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1061 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1062 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1063 1064 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1065 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1066 1067 PetscCall(MatSetBlockSize(J, nc)); 1068 /* determine the matrix preallocation information */ 1069 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1070 for (i = xs; i < xs + nx; i++) { 1071 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1072 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1073 1074 for (j = ys; j < ys + ny; j++) { 1075 slot = i - gxs + gnx * (j - gys); 1076 1077 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1078 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1079 1080 cnt = 0; 1081 for (k = 0; k < nc; k++) { 1082 for (l = lstart; l < lend + 1; l++) { 1083 for (p = pstart; p < pend + 1; p++) { 1084 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1085 cols[cnt++] = k + nc * (slot + gnx * l + p); 1086 } 1087 } 1088 } 1089 rows[k] = k + nc * (slot); 1090 } 1091 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1092 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1093 } 1094 } 1095 PetscCall(MatSetBlockSize(J, nc)); 1096 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1097 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1098 MatPreallocateEnd(dnz, onz); 1099 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1100 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1101 1102 /* 1103 For each node in the grid: we get the neighbors in the local (on processor ordering 1104 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1105 PETSc ordering. 1106 */ 1107 if (!da->prealloc_only) { 1108 for (i = xs; i < xs + nx; i++) { 1109 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1110 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1111 1112 for (j = ys; j < ys + ny; j++) { 1113 slot = i - gxs + gnx * (j - gys); 1114 1115 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1116 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1117 1118 cnt = 0; 1119 for (l = lstart; l < lend + 1; l++) { 1120 for (p = pstart; p < pend + 1; p++) { 1121 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1122 cols[cnt++] = nc * (slot + gnx * l + p); 1123 for (k = 1; k < nc; k++) { 1124 cols[cnt] = 1 + cols[cnt - 1]; 1125 cnt++; 1126 } 1127 } 1128 } 1129 } 1130 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1131 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1132 } 1133 } 1134 /* do not copy values to GPU since they are all zero and not yet needed there */ 1135 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1136 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1137 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1138 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1139 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1140 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1141 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1142 } 1143 PetscCall(PetscFree2(rows, cols)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1148 { 1149 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1150 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1151 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1152 DM_DA *dd = (DM_DA *)da->data; 1153 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1154 MPI_Comm comm; 1155 DMBoundaryType bx, by; 1156 ISLocalToGlobalMapping ltog; 1157 DMDAStencilType st; 1158 PetscBool removedups = PETSC_FALSE; 1159 1160 PetscFunctionBegin; 1161 /* 1162 nc - number of components per grid point 1163 col - number of colors needed in one direction for single component problem 1164 */ 1165 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1166 col = 2 * s + 1; 1167 /* 1168 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1169 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1170 */ 1171 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1172 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1173 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1174 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1175 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1176 1177 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1178 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1179 1180 PetscCall(MatSetBlockSize(J, nc)); 1181 /* determine the matrix preallocation information */ 1182 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1183 for (i = xs; i < xs + nx; i++) { 1184 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1185 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1186 1187 for (j = ys; j < ys + ny; j++) { 1188 slot = i - gxs + gnx * (j - gys); 1189 1190 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1191 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1192 1193 for (k = 0; k < nc; k++) { 1194 cnt = 0; 1195 for (l = lstart; l < lend + 1; l++) { 1196 for (p = pstart; p < pend + 1; p++) { 1197 if (l || p) { 1198 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1199 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1200 } 1201 } else { 1202 if (dfill) { 1203 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1204 } else { 1205 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1206 } 1207 } 1208 } 1209 } 1210 row = k + nc * (slot); 1211 maxcnt = PetscMax(maxcnt, cnt); 1212 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1213 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1214 } 1215 } 1216 } 1217 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1218 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1219 MatPreallocateEnd(dnz, onz); 1220 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1221 1222 /* 1223 For each node in the grid: we get the neighbors in the local (on processor ordering 1224 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1225 PETSc ordering. 1226 */ 1227 if (!da->prealloc_only) { 1228 for (i = xs; i < xs + nx; i++) { 1229 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1230 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1231 1232 for (j = ys; j < ys + ny; j++) { 1233 slot = i - gxs + gnx * (j - gys); 1234 1235 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1236 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1237 1238 for (k = 0; k < nc; k++) { 1239 cnt = 0; 1240 for (l = lstart; l < lend + 1; l++) { 1241 for (p = pstart; p < pend + 1; p++) { 1242 if (l || p) { 1243 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1244 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1245 } 1246 } else { 1247 if (dfill) { 1248 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1249 } else { 1250 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1251 } 1252 } 1253 } 1254 } 1255 row = k + nc * (slot); 1256 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1257 } 1258 } 1259 } 1260 /* do not copy values to GPU since they are all zero and not yet needed there */ 1261 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1262 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1263 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1264 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1265 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1266 } 1267 PetscCall(PetscFree(cols)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1272 { 1273 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1274 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1275 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1276 MPI_Comm comm; 1277 DMBoundaryType bx, by, bz; 1278 ISLocalToGlobalMapping ltog, mltog; 1279 DMDAStencilType st; 1280 PetscBool removedups = PETSC_FALSE; 1281 1282 PetscFunctionBegin; 1283 /* 1284 nc - number of components per grid point 1285 col - number of colors needed in one direction for single component problem 1286 */ 1287 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1288 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1289 col = 2 * s + 1; 1290 1291 /* 1292 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1293 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1294 */ 1295 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1296 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1297 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1298 1299 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1300 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1301 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1302 1303 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1304 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1305 1306 PetscCall(MatSetBlockSize(J, nc)); 1307 /* determine the matrix preallocation information */ 1308 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1309 for (i = xs; i < xs + nx; i++) { 1310 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1311 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1312 for (j = ys; j < ys + ny; j++) { 1313 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1314 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1315 for (k = zs; k < zs + nz; k++) { 1316 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1317 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1318 1319 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1320 1321 cnt = 0; 1322 for (l = 0; l < nc; l++) { 1323 for (ii = istart; ii < iend + 1; ii++) { 1324 for (jj = jstart; jj < jend + 1; jj++) { 1325 for (kk = kstart; kk < kend + 1; kk++) { 1326 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1327 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1328 } 1329 } 1330 } 1331 } 1332 rows[l] = l + nc * (slot); 1333 } 1334 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1335 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1336 } 1337 } 1338 } 1339 PetscCall(MatSetBlockSize(J, nc)); 1340 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1341 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1342 MatPreallocateEnd(dnz, onz); 1343 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1344 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1345 1346 /* 1347 For each node in the grid: we get the neighbors in the local (on processor ordering 1348 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1349 PETSc ordering. 1350 */ 1351 if (!da->prealloc_only) { 1352 for (i = xs; i < xs + nx; i++) { 1353 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1354 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1355 for (j = ys; j < ys + ny; j++) { 1356 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1357 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1358 for (k = zs; k < zs + nz; k++) { 1359 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1360 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1361 1362 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1363 1364 cnt = 0; 1365 for (kk = kstart; kk < kend + 1; kk++) { 1366 for (jj = jstart; jj < jend + 1; jj++) { 1367 for (ii = istart; ii < iend + 1; ii++) { 1368 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1369 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1370 for (l = 1; l < nc; l++) { 1371 cols[cnt] = 1 + cols[cnt - 1]; 1372 cnt++; 1373 } 1374 } 1375 } 1376 } 1377 } 1378 rows[0] = nc * (slot); 1379 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1380 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1381 } 1382 } 1383 } 1384 /* do not copy values to GPU since they are all zero and not yet needed there */ 1385 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1386 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1387 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1388 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1389 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1390 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1391 } 1392 PetscCall(PetscFree2(rows, cols)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1397 { 1398 DM_DA *dd = (DM_DA *)da->data; 1399 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1400 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1401 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1402 DMBoundaryType bx; 1403 ISLocalToGlobalMapping ltog; 1404 PetscMPIInt rank, size; 1405 1406 PetscFunctionBegin; 1407 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1408 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1409 1410 /* 1411 nc - number of components per grid point 1412 */ 1413 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1414 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1415 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1416 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1417 1418 PetscCall(MatSetBlockSize(J, nc)); 1419 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1420 1421 /* 1422 note should be smaller for first and last process with no periodic 1423 does not handle dfill 1424 */ 1425 cnt = 0; 1426 /* coupling with process to the left */ 1427 for (i = 0; i < s; i++) { 1428 for (j = 0; j < nc; j++) { 1429 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1430 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1431 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1432 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1433 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1434 } 1435 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1436 cnt++; 1437 } 1438 } 1439 for (i = s; i < nx - s; i++) { 1440 for (j = 0; j < nc; j++) { 1441 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1442 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1443 cnt++; 1444 } 1445 } 1446 /* coupling with process to the right */ 1447 for (i = nx - s; i < nx; i++) { 1448 for (j = 0; j < nc; j++) { 1449 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1450 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1451 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1452 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1453 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1454 } 1455 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1456 cnt++; 1457 } 1458 } 1459 1460 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1461 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1462 PetscCall(PetscFree2(cols, ocols)); 1463 1464 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1465 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1466 1467 /* 1468 For each node in the grid: we get the neighbors in the local (on processor ordering 1469 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1470 PETSc ordering. 1471 */ 1472 if (!da->prealloc_only) { 1473 PetscCall(PetscMalloc1(maxcnt, &cols)); 1474 row = xs * nc; 1475 /* coupling with process to the left */ 1476 for (i = xs; i < xs + s; i++) { 1477 for (j = 0; j < nc; j++) { 1478 cnt = 0; 1479 if (rank) { 1480 for (l = 0; l < s; l++) { 1481 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1482 } 1483 } 1484 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1485 for (l = 0; l < s; l++) { 1486 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1487 } 1488 } 1489 if (dfill) { 1490 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1491 } else { 1492 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1493 } 1494 for (l = 0; l < s; l++) { 1495 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1496 } 1497 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1498 row++; 1499 } 1500 } 1501 for (i = xs + s; i < xs + nx - s; i++) { 1502 for (j = 0; j < nc; j++) { 1503 cnt = 0; 1504 for (l = 0; l < s; l++) { 1505 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1506 } 1507 if (dfill) { 1508 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1509 } else { 1510 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1511 } 1512 for (l = 0; l < s; l++) { 1513 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1514 } 1515 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1516 row++; 1517 } 1518 } 1519 /* coupling with process to the right */ 1520 for (i = xs + nx - s; i < xs + nx; i++) { 1521 for (j = 0; j < nc; j++) { 1522 cnt = 0; 1523 for (l = 0; l < s; l++) { 1524 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1525 } 1526 if (dfill) { 1527 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1528 } else { 1529 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1530 } 1531 if (rank < size - 1) { 1532 for (l = 0; l < s; l++) { 1533 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1534 } 1535 } 1536 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1539 } 1540 } 1541 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1542 row++; 1543 } 1544 } 1545 PetscCall(PetscFree(cols)); 1546 /* do not copy values to GPU since they are all zero and not yet needed there */ 1547 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1548 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1549 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1550 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1551 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1552 } 1553 PetscFunctionReturn(PETSC_SUCCESS); 1554 } 1555 1556 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1557 { 1558 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1559 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1560 PetscInt istart, iend; 1561 DMBoundaryType bx; 1562 ISLocalToGlobalMapping ltog, mltog; 1563 1564 PetscFunctionBegin; 1565 /* 1566 nc - number of components per grid point 1567 col - number of colors needed in one direction for single component problem 1568 */ 1569 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1570 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1571 col = 2 * s + 1; 1572 1573 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1574 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1575 1576 PetscCall(MatSetBlockSize(J, nc)); 1577 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1578 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1579 1580 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1581 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1582 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1583 1584 /* 1585 For each node in the grid: we get the neighbors in the local (on processor ordering 1586 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1587 PETSc ordering. 1588 */ 1589 if (!da->prealloc_only) { 1590 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1591 for (i = xs; i < xs + nx; i++) { 1592 istart = PetscMax(-s, gxs - i); 1593 iend = PetscMin(s, gxs + gnx - i - 1); 1594 slot = i - gxs; 1595 1596 cnt = 0; 1597 for (i1 = istart; i1 < iend + 1; i1++) { 1598 cols[cnt++] = nc * (slot + i1); 1599 for (l = 1; l < nc; l++) { 1600 cols[cnt] = 1 + cols[cnt - 1]; 1601 cnt++; 1602 } 1603 } 1604 rows[0] = nc * (slot); 1605 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1606 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1607 } 1608 /* do not copy values to GPU since they are all zero and not yet needed there */ 1609 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1610 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1611 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1612 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1613 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1614 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1615 PetscCall(PetscFree2(rows, cols)); 1616 } 1617 PetscFunctionReturn(PETSC_SUCCESS); 1618 } 1619 1620 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1621 { 1622 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1623 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1624 PetscInt istart, iend; 1625 DMBoundaryType bx; 1626 ISLocalToGlobalMapping ltog, mltog; 1627 1628 PetscFunctionBegin; 1629 /* 1630 nc - number of components per grid point 1631 col - number of colors needed in one direction for single component problem 1632 */ 1633 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1634 col = 2 * s + 1; 1635 1636 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1637 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1638 1639 PetscCall(MatSetBlockSize(J, nc)); 1640 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1641 1642 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1643 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1644 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1645 1646 /* 1647 For each node in the grid: we get the neighbors in the local (on processor ordering 1648 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1649 PETSc ordering. 1650 */ 1651 if (!da->prealloc_only) { 1652 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1653 for (i = xs; i < xs + nx; i++) { 1654 istart = PetscMax(-s, gxs - i); 1655 iend = PetscMin(s, gxs + gnx - i - 1); 1656 slot = i - gxs; 1657 1658 cnt = 0; 1659 for (i1 = istart; i1 < iend + 1; i1++) { 1660 cols[cnt++] = nc * (slot + i1); 1661 for (l = 1; l < nc; l++) { 1662 cols[cnt] = 1 + cols[cnt - 1]; 1663 cnt++; 1664 } 1665 } 1666 rows[0] = nc * (slot); 1667 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1668 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1669 } 1670 /* do not copy values to GPU since they are all zero and not yet needed there */ 1671 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1672 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1673 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1674 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1675 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1676 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1677 PetscCall(PetscFree2(rows, cols)); 1678 } 1679 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1680 PetscFunctionReturn(PETSC_SUCCESS); 1681 } 1682 1683 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1684 { 1685 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1686 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1687 PetscInt istart, iend, jstart, jend, ii, jj; 1688 MPI_Comm comm; 1689 PetscScalar *values; 1690 DMBoundaryType bx, by; 1691 DMDAStencilType st; 1692 ISLocalToGlobalMapping ltog; 1693 1694 PetscFunctionBegin; 1695 /* 1696 nc - number of components per grid point 1697 col - number of colors needed in one direction for single component problem 1698 */ 1699 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1700 col = 2 * s + 1; 1701 1702 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1703 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1704 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1705 1706 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1707 1708 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1709 1710 /* determine the matrix preallocation information */ 1711 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1712 for (i = xs; i < xs + nx; i++) { 1713 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1714 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1715 for (j = ys; j < ys + ny; j++) { 1716 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1717 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1718 slot = i - gxs + gnx * (j - gys); 1719 1720 /* Find block columns in block row */ 1721 cnt = 0; 1722 for (ii = istart; ii < iend + 1; ii++) { 1723 for (jj = jstart; jj < jend + 1; jj++) { 1724 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1725 cols[cnt++] = slot + ii + gnx * jj; 1726 } 1727 } 1728 } 1729 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1730 } 1731 } 1732 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1733 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1734 MatPreallocateEnd(dnz, onz); 1735 1736 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1737 1738 /* 1739 For each node in the grid: we get the neighbors in the local (on processor ordering 1740 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1741 PETSc ordering. 1742 */ 1743 if (!da->prealloc_only) { 1744 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1745 for (i = xs; i < xs + nx; i++) { 1746 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1747 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1748 for (j = ys; j < ys + ny; j++) { 1749 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1750 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1751 slot = i - gxs + gnx * (j - gys); 1752 cnt = 0; 1753 for (ii = istart; ii < iend + 1; ii++) { 1754 for (jj = jstart; jj < jend + 1; jj++) { 1755 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1756 cols[cnt++] = slot + ii + gnx * jj; 1757 } 1758 } 1759 } 1760 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1761 } 1762 } 1763 PetscCall(PetscFree(values)); 1764 /* do not copy values to GPU since they are all zero and not yet needed there */ 1765 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1766 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1767 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1768 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1769 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1770 } 1771 PetscCall(PetscFree(cols)); 1772 PetscFunctionReturn(PETSC_SUCCESS); 1773 } 1774 1775 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1776 { 1777 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1778 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1779 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1780 MPI_Comm comm; 1781 PetscScalar *values; 1782 DMBoundaryType bx, by, bz; 1783 DMDAStencilType st; 1784 ISLocalToGlobalMapping ltog; 1785 1786 PetscFunctionBegin; 1787 /* 1788 nc - number of components per grid point 1789 col - number of colors needed in one direction for single component problem 1790 */ 1791 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1792 col = 2 * s + 1; 1793 1794 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1795 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1796 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1797 1798 PetscCall(PetscMalloc1(col * col * col, &cols)); 1799 1800 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1801 1802 /* determine the matrix preallocation information */ 1803 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1804 for (i = xs; i < xs + nx; i++) { 1805 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1806 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1807 for (j = ys; j < ys + ny; j++) { 1808 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1809 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1810 for (k = zs; k < zs + nz; k++) { 1811 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1812 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1813 1814 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1815 1816 /* Find block columns in block row */ 1817 cnt = 0; 1818 for (ii = istart; ii < iend + 1; ii++) { 1819 for (jj = jstart; jj < jend + 1; jj++) { 1820 for (kk = kstart; kk < kend + 1; kk++) { 1821 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1822 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1823 } 1824 } 1825 } 1826 } 1827 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1828 } 1829 } 1830 } 1831 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1832 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1833 MatPreallocateEnd(dnz, onz); 1834 1835 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1836 1837 /* 1838 For each node in the grid: we get the neighbors in the local (on processor ordering 1839 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1840 PETSc ordering. 1841 */ 1842 if (!da->prealloc_only) { 1843 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1844 for (i = xs; i < xs + nx; i++) { 1845 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1846 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1847 for (j = ys; j < ys + ny; j++) { 1848 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1849 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1850 for (k = zs; k < zs + nz; k++) { 1851 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1852 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1853 1854 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1855 1856 cnt = 0; 1857 for (ii = istart; ii < iend + 1; ii++) { 1858 for (jj = jstart; jj < jend + 1; jj++) { 1859 for (kk = kstart; kk < kend + 1; kk++) { 1860 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1861 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1862 } 1863 } 1864 } 1865 } 1866 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1867 } 1868 } 1869 } 1870 PetscCall(PetscFree(values)); 1871 /* do not copy values to GPU since they are all zero and not yet needed there */ 1872 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1873 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1874 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1875 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1876 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1877 } 1878 PetscCall(PetscFree(cols)); 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 /* 1883 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1884 identify in the local ordering with periodic domain. 1885 */ 1886 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1887 { 1888 PetscInt i, n; 1889 1890 PetscFunctionBegin; 1891 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1892 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1893 for (i = 0, n = 0; i < *cnt; i++) { 1894 if (col[i] >= *row) col[n++] = col[i]; 1895 } 1896 *cnt = n; 1897 PetscFunctionReturn(PETSC_SUCCESS); 1898 } 1899 1900 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1901 { 1902 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1903 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1904 PetscInt istart, iend, jstart, jend, ii, jj; 1905 MPI_Comm comm; 1906 PetscScalar *values; 1907 DMBoundaryType bx, by; 1908 DMDAStencilType st; 1909 ISLocalToGlobalMapping ltog; 1910 1911 PetscFunctionBegin; 1912 /* 1913 nc - number of components per grid point 1914 col - number of colors needed in one direction for single component problem 1915 */ 1916 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1917 col = 2 * s + 1; 1918 1919 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1920 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1921 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1922 1923 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1924 1925 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1926 1927 /* determine the matrix preallocation information */ 1928 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1929 for (i = xs; i < xs + nx; i++) { 1930 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1931 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1932 for (j = ys; j < ys + ny; j++) { 1933 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1934 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1935 slot = i - gxs + gnx * (j - gys); 1936 1937 /* Find block columns in block row */ 1938 cnt = 0; 1939 for (ii = istart; ii < iend + 1; ii++) { 1940 for (jj = jstart; jj < jend + 1; jj++) { 1941 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1942 } 1943 } 1944 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1945 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1946 } 1947 } 1948 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1949 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1950 MatPreallocateEnd(dnz, onz); 1951 1952 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1953 1954 /* 1955 For each node in the grid: we get the neighbors in the local (on processor ordering 1956 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1957 PETSc ordering. 1958 */ 1959 if (!da->prealloc_only) { 1960 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1961 for (i = xs; i < xs + nx; i++) { 1962 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1963 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1964 for (j = ys; j < ys + ny; j++) { 1965 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1966 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1967 slot = i - gxs + gnx * (j - gys); 1968 1969 /* Find block columns in block row */ 1970 cnt = 0; 1971 for (ii = istart; ii < iend + 1; ii++) { 1972 for (jj = jstart; jj < jend + 1; jj++) { 1973 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1974 } 1975 } 1976 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1977 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1978 } 1979 } 1980 PetscCall(PetscFree(values)); 1981 /* do not copy values to GPU since they are all zero and not yet needed there */ 1982 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1983 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1984 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1985 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1986 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1987 } 1988 PetscCall(PetscFree(cols)); 1989 PetscFunctionReturn(PETSC_SUCCESS); 1990 } 1991 1992 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1993 { 1994 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1995 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1996 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1997 MPI_Comm comm; 1998 PetscScalar *values; 1999 DMBoundaryType bx, by, bz; 2000 DMDAStencilType st; 2001 ISLocalToGlobalMapping ltog; 2002 2003 PetscFunctionBegin; 2004 /* 2005 nc - number of components per grid point 2006 col - number of colors needed in one direction for single component problem 2007 */ 2008 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2009 col = 2 * s + 1; 2010 2011 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2012 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2013 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2014 2015 /* create the matrix */ 2016 PetscCall(PetscMalloc1(col * col * col, &cols)); 2017 2018 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2019 2020 /* determine the matrix preallocation information */ 2021 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2022 for (i = xs; i < xs + nx; i++) { 2023 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2024 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2025 for (j = ys; j < ys + ny; j++) { 2026 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2027 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2028 for (k = zs; k < zs + nz; k++) { 2029 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2030 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2031 2032 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2033 2034 /* Find block columns in block row */ 2035 cnt = 0; 2036 for (ii = istart; ii < iend + 1; ii++) { 2037 for (jj = jstart; jj < jend + 1; jj++) { 2038 for (kk = kstart; kk < kend + 1; kk++) { 2039 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2040 } 2041 } 2042 } 2043 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2044 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2045 } 2046 } 2047 } 2048 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2049 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2050 MatPreallocateEnd(dnz, onz); 2051 2052 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2053 2054 /* 2055 For each node in the grid: we get the neighbors in the local (on processor ordering 2056 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2057 PETSc ordering. 2058 */ 2059 if (!da->prealloc_only) { 2060 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2061 for (i = xs; i < xs + nx; i++) { 2062 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2063 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2064 for (j = ys; j < ys + ny; j++) { 2065 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2066 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2067 for (k = zs; k < zs + nz; k++) { 2068 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2069 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2070 2071 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2072 2073 cnt = 0; 2074 for (ii = istart; ii < iend + 1; ii++) { 2075 for (jj = jstart; jj < jend + 1; jj++) { 2076 for (kk = kstart; kk < kend + 1; kk++) { 2077 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2078 } 2079 } 2080 } 2081 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2082 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2083 } 2084 } 2085 } 2086 PetscCall(PetscFree(values)); 2087 /* do not copy values to GPU since they are all zero and not yet needed there */ 2088 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2089 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2090 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2091 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2092 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2093 } 2094 PetscCall(PetscFree(cols)); 2095 PetscFunctionReturn(PETSC_SUCCESS); 2096 } 2097 2098 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2099 { 2100 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2101 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2102 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2103 DM_DA *dd = (DM_DA *)da->data; 2104 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2105 MPI_Comm comm; 2106 PetscScalar *values; 2107 DMBoundaryType bx, by, bz; 2108 ISLocalToGlobalMapping ltog; 2109 DMDAStencilType st; 2110 PetscBool removedups = PETSC_FALSE; 2111 2112 PetscFunctionBegin; 2113 /* 2114 nc - number of components per grid point 2115 col - number of colors needed in one direction for single component problem 2116 */ 2117 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2118 col = 2 * s + 1; 2119 2120 /* 2121 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2122 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2123 */ 2124 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2125 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2126 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2127 2128 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2129 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2130 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2131 2132 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2133 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2134 2135 /* determine the matrix preallocation information */ 2136 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2137 2138 PetscCall(MatSetBlockSize(J, nc)); 2139 for (i = xs; i < xs + nx; i++) { 2140 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2141 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2142 for (j = ys; j < ys + ny; j++) { 2143 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2144 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2145 for (k = zs; k < zs + nz; k++) { 2146 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2147 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2148 2149 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2150 2151 for (l = 0; l < nc; l++) { 2152 cnt = 0; 2153 for (ii = istart; ii < iend + 1; ii++) { 2154 for (jj = jstart; jj < jend + 1; jj++) { 2155 for (kk = kstart; kk < kend + 1; kk++) { 2156 if (ii || jj || kk) { 2157 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2158 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2159 } 2160 } else { 2161 if (dfill) { 2162 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2163 } else { 2164 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2165 } 2166 } 2167 } 2168 } 2169 } 2170 row = l + nc * (slot); 2171 maxcnt = PetscMax(maxcnt, cnt); 2172 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2173 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2174 } 2175 } 2176 } 2177 } 2178 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2179 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2180 MatPreallocateEnd(dnz, onz); 2181 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2182 2183 /* 2184 For each node in the grid: we get the neighbors in the local (on processor ordering 2185 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2186 PETSc ordering. 2187 */ 2188 if (!da->prealloc_only) { 2189 PetscCall(PetscCalloc1(maxcnt, &values)); 2190 for (i = xs; i < xs + nx; i++) { 2191 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2192 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2193 for (j = ys; j < ys + ny; j++) { 2194 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2195 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2196 for (k = zs; k < zs + nz; k++) { 2197 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2198 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2199 2200 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2201 2202 for (l = 0; l < nc; l++) { 2203 cnt = 0; 2204 for (ii = istart; ii < iend + 1; ii++) { 2205 for (jj = jstart; jj < jend + 1; jj++) { 2206 for (kk = kstart; kk < kend + 1; kk++) { 2207 if (ii || jj || kk) { 2208 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2209 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2210 } 2211 } else { 2212 if (dfill) { 2213 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2214 } else { 2215 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2216 } 2217 } 2218 } 2219 } 2220 } 2221 row = l + nc * (slot); 2222 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2223 } 2224 } 2225 } 2226 } 2227 PetscCall(PetscFree(values)); 2228 /* do not copy values to GPU since they are all zero and not yet needed there */ 2229 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2230 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2231 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2232 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2233 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2234 } 2235 PetscCall(PetscFree(cols)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238