1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 72 columns to the left with any nonzeros in them plus 1 */ 73 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 74 for (i = 0; i < dd->w; i++) { 75 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 76 } 77 for (i = 0; i < dd->w; i++) { 78 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 79 } 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 /*@ 84 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 85 of the matrix returned by `DMCreateMatrix()`. 86 87 Logically Collective 88 89 Input Parameters: 90 + da - the distributed array 91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block) 92 - ofill - the fill pattern in the off-diagonal blocks 93 94 Level: developer 95 96 Notes: 97 This only makes sense when you are doing multicomponent problems but using the 98 `MATMPIAIJ` matrix format 99 100 The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries 101 representing coupling and 0 entries for missing coupling. For example 102 .vb 103 dfill[9] = {1, 0, 0, 104 1, 1, 0, 105 0, 1, 1} 106 .ve 107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 109 diagonal block). 110 111 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 112 can be represented in the `dfill`, `ofill` format 113 114 Contributed by\: 115 Glenn Hammond 116 117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the distributed array 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by\: 169 Philip C. Roth 170 171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 264 { 265 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 266 PetscInt ncolors = 0; 267 MPI_Comm comm; 268 DMBoundaryType bx, by; 269 DMDAStencilType st; 270 ISColoringValue *colors; 271 DM_DA *dd = (DM_DA *)da->data; 272 273 PetscFunctionBegin; 274 /* 275 nc - number of components per grid point 276 col - number of colors needed in one direction for single component problem 277 278 */ 279 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 280 col = 2 * s + 1; 281 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 282 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 283 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 284 285 /* special case as taught to us by Paul Hovland */ 286 if (st == DMDA_STENCIL_STAR && s == 1) { 287 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 288 } else { 289 if (ctype == IS_COLORING_GLOBAL) { 290 if (!dd->localcoloring) { 291 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 292 ii = 0; 293 for (j = ys; j < ys + ny; j++) { 294 for (i = xs; i < xs + nx; i++) { 295 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 296 } 297 } 298 ncolors = nc + nc * (col - 1 + col * (col - 1)); 299 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 300 } 301 *coloring = dd->localcoloring; 302 } else if (ctype == IS_COLORING_LOCAL) { 303 if (!dd->ghostedcoloring) { 304 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 305 ii = 0; 306 for (j = gys; j < gys + gny; j++) { 307 for (i = gxs; i < gxs + gnx; i++) { 308 for (k = 0; k < nc; k++) { 309 /* the complicated stuff is to handle periodic boundaries */ 310 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 311 } 312 } 313 } 314 ncolors = nc + nc * (col - 1 + col * (col - 1)); 315 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 316 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 317 318 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 319 } 320 *coloring = dd->ghostedcoloring; 321 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 322 } 323 PetscCall(ISColoringReference(*coloring)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 328 { 329 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 330 PetscInt ncolors; 331 MPI_Comm comm; 332 DMBoundaryType bx, by, bz; 333 DMDAStencilType st; 334 ISColoringValue *colors; 335 DM_DA *dd = (DM_DA *)da->data; 336 337 PetscFunctionBegin; 338 /* 339 nc - number of components per grid point 340 col - number of colors needed in one direction for single component problem 341 */ 342 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 343 col = 2 * s + 1; 344 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 345 by 2*stencil_width + 1\n"); 346 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 347 by 2*stencil_width + 1\n"); 348 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 349 by 2*stencil_width + 1\n"); 350 351 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 352 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 353 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 354 355 /* create the coloring */ 356 if (ctype == IS_COLORING_GLOBAL) { 357 if (!dd->localcoloring) { 358 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 359 ii = 0; 360 for (k = zs; k < zs + nz; k++) { 361 for (j = ys; j < ys + ny; j++) { 362 for (i = xs; i < xs + nx; i++) { 363 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 364 } 365 } 366 } 367 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 368 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 369 } 370 *coloring = dd->localcoloring; 371 } else if (ctype == IS_COLORING_LOCAL) { 372 if (!dd->ghostedcoloring) { 373 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 374 ii = 0; 375 for (k = gzs; k < gzs + gnz; k++) { 376 for (j = gys; j < gys + gny; j++) { 377 for (i = gxs; i < gxs + gnx; i++) { 378 for (l = 0; l < nc; l++) { 379 /* the complicated stuff is to handle periodic boundaries */ 380 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 386 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 387 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 388 } 389 *coloring = dd->ghostedcoloring; 390 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 391 PetscCall(ISColoringReference(*coloring)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 396 { 397 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 398 PetscInt ncolors; 399 MPI_Comm comm; 400 DMBoundaryType bx; 401 ISColoringValue *colors; 402 DM_DA *dd = (DM_DA *)da->data; 403 404 PetscFunctionBegin; 405 /* 406 nc - number of components per grid point 407 col - number of colors needed in one direction for single component problem 408 */ 409 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 410 col = 2 * s + 1; 411 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 412 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 413 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 414 415 /* create the coloring */ 416 if (ctype == IS_COLORING_GLOBAL) { 417 if (!dd->localcoloring) { 418 PetscCall(PetscMalloc1(nc * nx, &colors)); 419 if (dd->ofillcols) { 420 PetscInt tc = 0; 421 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 422 i1 = 0; 423 for (i = xs; i < xs + nx; i++) { 424 for (l = 0; l < nc; l++) { 425 if (dd->ofillcols[l] && (i % col)) { 426 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 427 } else { 428 colors[i1++] = l; 429 } 430 } 431 } 432 ncolors = nc + 2 * s * tc; 433 } else { 434 i1 = 0; 435 for (i = xs; i < xs + nx; i++) { 436 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 437 } 438 ncolors = nc + nc * (col - 1); 439 } 440 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 441 } 442 *coloring = dd->localcoloring; 443 } else if (ctype == IS_COLORING_LOCAL) { 444 if (!dd->ghostedcoloring) { 445 PetscCall(PetscMalloc1(nc * gnx, &colors)); 446 i1 = 0; 447 for (i = gxs; i < gxs + gnx; i++) { 448 for (l = 0; l < nc; l++) { 449 /* the complicated stuff is to handle periodic boundaries */ 450 colors[i1++] = l + nc * (SetInRange(i, m) % col); 451 } 452 } 453 ncolors = nc + nc * (col - 1); 454 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 455 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 456 } 457 *coloring = dd->ghostedcoloring; 458 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 459 PetscCall(ISColoringReference(*coloring)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 464 { 465 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 466 PetscInt ncolors; 467 MPI_Comm comm; 468 DMBoundaryType bx, by; 469 ISColoringValue *colors; 470 DM_DA *dd = (DM_DA *)da->data; 471 472 PetscFunctionBegin; 473 /* 474 nc - number of components per grid point 475 col - number of colors needed in one direction for single component problem 476 */ 477 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 478 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 479 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 480 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 481 /* create the coloring */ 482 if (ctype == IS_COLORING_GLOBAL) { 483 if (!dd->localcoloring) { 484 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 485 ii = 0; 486 for (j = ys; j < ys + ny; j++) { 487 for (i = xs; i < xs + nx; i++) { 488 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 489 } 490 } 491 ncolors = 5 * nc; 492 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 493 } 494 *coloring = dd->localcoloring; 495 } else if (ctype == IS_COLORING_LOCAL) { 496 if (!dd->ghostedcoloring) { 497 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 498 ii = 0; 499 for (j = gys; j < gys + gny; j++) { 500 for (i = gxs; i < gxs + gnx; i++) { 501 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 502 } 503 } 504 ncolors = 5 * nc; 505 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 506 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 507 } 508 *coloring = dd->ghostedcoloring; 509 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 /* =========================================================================== */ 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 528 529 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 530 { 531 DM da; 532 const char *prefix; 533 Mat AA, Anatural; 534 AO ao; 535 PetscInt rstart, rend, *petsc, i; 536 IS is; 537 MPI_Comm comm; 538 PetscViewerFormat format; 539 PetscBool flag; 540 541 PetscFunctionBegin; 542 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 543 PetscCall(PetscViewerGetFormat(viewer, &format)); 544 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 545 546 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 547 PetscCall(MatGetDM(A, &da)); 548 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 549 550 PetscCall(DMDAGetAO(da, &ao)); 551 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 552 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 553 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 554 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 555 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 556 557 /* call viewer on natural ordering */ 558 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 559 if (flag) { 560 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 561 A = AA; 562 } 563 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 564 PetscCall(ISDestroy(&is)); 565 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 566 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 567 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 568 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 569 PetscCall(MatView(Anatural, viewer)); 570 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 571 PetscCall(MatDestroy(&Anatural)); 572 if (flag) PetscCall(MatDestroy(&AA)); 573 PetscFunctionReturn(PETSC_SUCCESS); 574 } 575 576 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 577 { 578 DM da; 579 Mat Anatural, Aapp; 580 AO ao; 581 PetscInt rstart, rend, *app, i, m, n, M, N; 582 IS is; 583 MPI_Comm comm; 584 585 PetscFunctionBegin; 586 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 587 PetscCall(MatGetDM(A, &da)); 588 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 589 590 /* Load the matrix in natural ordering */ 591 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 592 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 593 PetscCall(MatGetSize(A, &M, &N)); 594 PetscCall(MatGetLocalSize(A, &m, &n)); 595 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 596 PetscCall(MatLoad(Anatural, viewer)); 597 598 /* Map natural ordering to application ordering and create IS */ 599 PetscCall(DMDAGetAO(da, &ao)); 600 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 601 PetscCall(PetscMalloc1(rend - rstart, &app)); 602 for (i = rstart; i < rend; i++) app[i - rstart] = i; 603 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 604 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 605 606 /* Do permutation and replace header */ 607 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 608 PetscCall(MatHeaderReplace(A, &Aapp)); 609 PetscCall(ISDestroy(&is)); 610 PetscCall(MatDestroy(&Anatural)); 611 PetscFunctionReturn(PETSC_SUCCESS); 612 } 613 614 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 615 { 616 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 617 Mat A; 618 MPI_Comm comm; 619 MatType Atype; 620 MatType mtype; 621 PetscMPIInt size; 622 DM_DA *dd = (DM_DA *)da->data; 623 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 624 625 PetscFunctionBegin; 626 PetscCall(MatInitializePackage()); 627 mtype = da->mattype; 628 629 /* 630 m 631 ------------------------------------------------------ 632 | | 633 | | 634 | ---------------------- | 635 | | | | 636 n | ny | | | 637 | | | | 638 | .--------------------- | 639 | (xs,ys) nx | 640 | . | 641 | (gxs,gys) | 642 | | 643 ----------------------------------------------------- 644 */ 645 646 /* 647 nc - number of components per grid point 648 col - number of colors needed in one direction for single component problem 649 */ 650 M = dd->M; 651 N = dd->N; 652 P = dd->P; 653 dim = da->dim; 654 dof = dd->w; 655 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 656 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 657 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 658 PetscCall(MatCreate(comm, &A)); 659 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 660 PetscCall(MatSetType(A, mtype)); 661 PetscCall(MatSetFromOptions(A)); 662 if (dof * nx * ny * nz < da->bind_below) { 663 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 664 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 665 } 666 PetscCall(MatSetDM(A, da)); 667 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 668 PetscCall(MatGetType(A, &Atype)); 669 /* 670 We do not provide a getmatrix function in the DMDA operations because 671 the basic DMDA does not know about matrices. We think of DMDA as being more 672 more low-level than matrices. This is kind of cheating but, cause sometimes 673 we think of DMDA has higher level than matrices. 674 675 We could switch based on Atype (or mtype), but we do not since the 676 specialized setting routines depend only on the particular preallocation 677 details of the matrix, not the type itself. 678 */ 679 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 680 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 681 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 682 if (!aij) { 683 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 684 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 685 if (!baij) { 686 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 687 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 688 if (!sbaij) { 689 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 690 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 691 } 692 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 693 } 694 } 695 } 696 if (aij) { 697 if (dim == 1) { 698 if (dd->ofill) { 699 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 700 } else { 701 DMBoundaryType bx; 702 PetscMPIInt size; 703 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 704 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 705 if (size == 1 && bx == DM_BOUNDARY_NONE) { 706 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 707 } else { 708 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 709 } 710 } 711 } else if (dim == 2) { 712 if (dd->ofill) { 713 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 714 } else { 715 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 716 } 717 } else if (dim == 3) { 718 if (dd->ofill) { 719 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 720 } else { 721 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 722 } 723 } 724 } else if (baij) { 725 if (dim == 2) { 726 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 727 } else if (dim == 3) { 728 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 729 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 730 } else if (sbaij) { 731 if (dim == 2) { 732 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 733 } else if (dim == 3) { 734 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 735 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 736 } else if (sell) { 737 if (dim == 2) { 738 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 739 } else if (dim == 3) { 740 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 741 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 742 } else if (is) { 743 PetscCall(DMCreateMatrix_DA_IS(da, A)); 744 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 745 ISLocalToGlobalMapping ltog; 746 747 PetscCall(MatSetBlockSize(A, dof)); 748 PetscCall(MatSetUp(A)); 749 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 750 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 751 } 752 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 753 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 754 PetscCall(MatSetDM(A, da)); 755 PetscCallMPI(MPI_Comm_size(comm, &size)); 756 if (size > 1) { 757 /* change viewer to display matrix in natural ordering */ 758 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 759 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 760 } 761 *J = A; 762 PetscFunctionReturn(PETSC_SUCCESS); 763 } 764 765 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 766 767 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 768 { 769 DM_DA *da = (DM_DA *)dm->data; 770 Mat lJ, P; 771 ISLocalToGlobalMapping ltog; 772 IS is; 773 PetscBT bt; 774 const PetscInt *e_loc, *idx; 775 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 776 777 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 778 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 779 PetscFunctionBegin; 780 dof = da->w; 781 PetscCall(MatSetBlockSize(J, dof)); 782 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 783 784 /* flag local elements indices in local DMDA numbering */ 785 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 786 PetscCall(PetscBTCreate(nv / dof, &bt)); 787 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 788 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 789 790 /* filter out (set to -1) the global indices not used by the local elements */ 791 PetscCall(PetscMalloc1(nv / dof, &gidx)); 792 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 793 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 794 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 795 for (i = 0; i < nv / dof; i++) 796 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 797 PetscCall(PetscBTDestroy(&bt)); 798 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 799 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 800 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 801 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 802 PetscCall(ISDestroy(&is)); 803 804 /* Preallocation */ 805 if (dm->prealloc_skip) { 806 PetscCall(MatSetUp(J)); 807 } else { 808 PetscCall(MatISGetLocalMat(J, &lJ)); 809 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 810 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 811 PetscCall(MatSetType(P, MATPREALLOCATOR)); 812 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 813 PetscCall(MatGetSize(lJ, &N, NULL)); 814 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 815 PetscCall(MatSetSizes(P, n, n, N, N)); 816 PetscCall(MatSetBlockSize(P, dof)); 817 PetscCall(MatSetUp(P)); 818 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 819 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 820 PetscCall(MatISRestoreLocalMat(J, &lJ)); 821 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 822 PetscCall(MatDestroy(&P)); 823 824 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 825 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 826 } 827 PetscFunctionReturn(PETSC_SUCCESS); 828 } 829 830 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 831 { 832 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 833 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 834 MPI_Comm comm; 835 PetscScalar *values; 836 DMBoundaryType bx, by; 837 ISLocalToGlobalMapping ltog; 838 DMDAStencilType st; 839 840 PetscFunctionBegin; 841 /* 842 nc - number of components per grid point 843 col - number of colors needed in one direction for single component problem 844 */ 845 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 846 col = 2 * s + 1; 847 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 848 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 849 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 850 851 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 852 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 853 854 PetscCall(MatSetBlockSize(J, nc)); 855 /* determine the matrix preallocation information */ 856 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 857 for (i = xs; i < xs + nx; i++) { 858 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 859 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 860 861 for (j = ys; j < ys + ny; j++) { 862 slot = i - gxs + gnx * (j - gys); 863 864 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 865 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 866 867 cnt = 0; 868 for (k = 0; k < nc; k++) { 869 for (l = lstart; l < lend + 1; l++) { 870 for (p = pstart; p < pend + 1; p++) { 871 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 872 cols[cnt++] = k + nc * (slot + gnx * l + p); 873 } 874 } 875 } 876 rows[k] = k + nc * (slot); 877 } 878 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 879 } 880 } 881 PetscCall(MatSetBlockSize(J, nc)); 882 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 883 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 884 MatPreallocateEnd(dnz, onz); 885 886 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 887 888 /* 889 For each node in the grid: we get the neighbors in the local (on processor ordering 890 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 891 PETSc ordering. 892 */ 893 if (!da->prealloc_only) { 894 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 895 for (i = xs; i < xs + nx; i++) { 896 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 897 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 898 899 for (j = ys; j < ys + ny; j++) { 900 slot = i - gxs + gnx * (j - gys); 901 902 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 903 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 904 905 cnt = 0; 906 for (k = 0; k < nc; k++) { 907 for (l = lstart; l < lend + 1; l++) { 908 for (p = pstart; p < pend + 1; p++) { 909 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 910 cols[cnt++] = k + nc * (slot + gnx * l + p); 911 } 912 } 913 } 914 rows[k] = k + nc * (slot); 915 } 916 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 917 } 918 } 919 PetscCall(PetscFree(values)); 920 /* do not copy values to GPU since they are all zero and not yet needed there */ 921 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 922 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 925 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 926 } 927 PetscCall(PetscFree2(rows, cols)); 928 PetscFunctionReturn(PETSC_SUCCESS); 929 } 930 931 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 932 { 933 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 934 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 935 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 936 MPI_Comm comm; 937 PetscScalar *values; 938 DMBoundaryType bx, by, bz; 939 ISLocalToGlobalMapping ltog; 940 DMDAStencilType st; 941 942 PetscFunctionBegin; 943 /* 944 nc - number of components per grid point 945 col - number of colors needed in one direction for single component problem 946 */ 947 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 948 col = 2 * s + 1; 949 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 950 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 951 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 952 953 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 954 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 955 956 PetscCall(MatSetBlockSize(J, nc)); 957 /* determine the matrix preallocation information */ 958 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 959 for (i = xs; i < xs + nx; i++) { 960 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 961 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 962 for (j = ys; j < ys + ny; j++) { 963 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 964 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 965 for (k = zs; k < zs + nz; k++) { 966 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 967 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 968 969 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 970 971 cnt = 0; 972 for (l = 0; l < nc; l++) { 973 for (ii = istart; ii < iend + 1; ii++) { 974 for (jj = jstart; jj < jend + 1; jj++) { 975 for (kk = kstart; kk < kend + 1; kk++) { 976 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 977 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 978 } 979 } 980 } 981 } 982 rows[l] = l + nc * (slot); 983 } 984 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 985 } 986 } 987 } 988 PetscCall(MatSetBlockSize(J, nc)); 989 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 990 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 991 MatPreallocateEnd(dnz, onz); 992 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 993 994 /* 995 For each node in the grid: we get the neighbors in the local (on processor ordering 996 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 997 PETSc ordering. 998 */ 999 if (!da->prealloc_only) { 1000 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1001 for (i = xs; i < xs + nx; i++) { 1002 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1003 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1004 for (j = ys; j < ys + ny; j++) { 1005 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1006 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1007 for (k = zs; k < zs + nz; k++) { 1008 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1009 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1010 1011 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1012 1013 cnt = 0; 1014 for (l = 0; l < nc; l++) { 1015 for (ii = istart; ii < iend + 1; ii++) { 1016 for (jj = jstart; jj < jend + 1; jj++) { 1017 for (kk = kstart; kk < kend + 1; kk++) { 1018 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1019 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1020 } 1021 } 1022 } 1023 } 1024 rows[l] = l + nc * (slot); 1025 } 1026 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1027 } 1028 } 1029 } 1030 PetscCall(PetscFree(values)); 1031 /* do not copy values to GPU since they are all zero and not yet needed there */ 1032 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1033 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1034 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1035 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1036 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1037 } 1038 PetscCall(PetscFree2(rows, cols)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1043 { 1044 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1045 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1046 MPI_Comm comm; 1047 DMBoundaryType bx, by; 1048 ISLocalToGlobalMapping ltog, mltog; 1049 DMDAStencilType st; 1050 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1051 1052 PetscFunctionBegin; 1053 /* 1054 nc - number of components per grid point 1055 col - number of colors needed in one direction for single component problem 1056 */ 1057 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1058 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1059 col = 2 * s + 1; 1060 /* 1061 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1062 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1063 */ 1064 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1065 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1066 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1067 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1068 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1069 1070 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1071 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1072 1073 PetscCall(MatSetBlockSize(J, nc)); 1074 /* determine the matrix preallocation information */ 1075 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1076 for (i = xs; i < xs + nx; i++) { 1077 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1078 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1079 1080 for (j = ys; j < ys + ny; j++) { 1081 slot = i - gxs + gnx * (j - gys); 1082 1083 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1084 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1085 1086 cnt = 0; 1087 for (k = 0; k < nc; k++) { 1088 for (l = lstart; l < lend + 1; l++) { 1089 for (p = pstart; p < pend + 1; p++) { 1090 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1091 cols[cnt++] = k + nc * (slot + gnx * l + p); 1092 } 1093 } 1094 } 1095 rows[k] = k + nc * (slot); 1096 } 1097 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1098 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1099 } 1100 } 1101 PetscCall(MatSetBlockSize(J, nc)); 1102 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1103 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1104 MatPreallocateEnd(dnz, onz); 1105 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1106 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1107 1108 /* 1109 For each node in the grid: we get the neighbors in the local (on processor ordering 1110 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1111 PETSc ordering. 1112 */ 1113 if (!da->prealloc_only) { 1114 for (i = xs; i < xs + nx; i++) { 1115 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1116 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1117 1118 for (j = ys; j < ys + ny; j++) { 1119 slot = i - gxs + gnx * (j - gys); 1120 1121 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1122 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1123 1124 cnt = 0; 1125 for (l = lstart; l < lend + 1; l++) { 1126 for (p = pstart; p < pend + 1; p++) { 1127 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1128 cols[cnt++] = nc * (slot + gnx * l + p); 1129 for (k = 1; k < nc; k++) { 1130 cols[cnt] = 1 + cols[cnt - 1]; 1131 cnt++; 1132 } 1133 } 1134 } 1135 } 1136 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1137 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1138 } 1139 } 1140 /* do not copy values to GPU since they are all zero and not yet needed there */ 1141 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1142 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1143 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1144 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1145 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1146 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1147 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1148 } 1149 PetscCall(PetscFree2(rows, cols)); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1154 { 1155 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1156 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1157 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1158 DM_DA *dd = (DM_DA *)da->data; 1159 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1160 MPI_Comm comm; 1161 DMBoundaryType bx, by; 1162 ISLocalToGlobalMapping ltog; 1163 DMDAStencilType st; 1164 PetscBool removedups = PETSC_FALSE; 1165 1166 PetscFunctionBegin; 1167 /* 1168 nc - number of components per grid point 1169 col - number of colors needed in one direction for single component problem 1170 */ 1171 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1172 col = 2 * s + 1; 1173 /* 1174 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1175 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1176 */ 1177 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1178 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1179 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1180 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1181 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1182 1183 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1184 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1185 1186 PetscCall(MatSetBlockSize(J, nc)); 1187 /* determine the matrix preallocation information */ 1188 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1189 for (i = xs; i < xs + nx; i++) { 1190 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1191 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1192 1193 for (j = ys; j < ys + ny; j++) { 1194 slot = i - gxs + gnx * (j - gys); 1195 1196 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1197 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1198 1199 for (k = 0; k < nc; k++) { 1200 cnt = 0; 1201 for (l = lstart; l < lend + 1; l++) { 1202 for (p = pstart; p < pend + 1; p++) { 1203 if (l || p) { 1204 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1205 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1206 } 1207 } else { 1208 if (dfill) { 1209 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1210 } else { 1211 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1212 } 1213 } 1214 } 1215 } 1216 row = k + nc * (slot); 1217 maxcnt = PetscMax(maxcnt, cnt); 1218 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1219 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1220 } 1221 } 1222 } 1223 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1224 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1225 MatPreallocateEnd(dnz, onz); 1226 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1227 1228 /* 1229 For each node in the grid: we get the neighbors in the local (on processor ordering 1230 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1231 PETSc ordering. 1232 */ 1233 if (!da->prealloc_only) { 1234 for (i = xs; i < xs + nx; i++) { 1235 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1236 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1237 1238 for (j = ys; j < ys + ny; j++) { 1239 slot = i - gxs + gnx * (j - gys); 1240 1241 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1242 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1243 1244 for (k = 0; k < nc; k++) { 1245 cnt = 0; 1246 for (l = lstart; l < lend + 1; l++) { 1247 for (p = pstart; p < pend + 1; p++) { 1248 if (l || p) { 1249 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1250 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1251 } 1252 } else { 1253 if (dfill) { 1254 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1255 } else { 1256 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1257 } 1258 } 1259 } 1260 } 1261 row = k + nc * (slot); 1262 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1263 } 1264 } 1265 } 1266 /* do not copy values to GPU since they are all zero and not yet needed there */ 1267 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1268 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1269 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1270 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1271 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1272 } 1273 PetscCall(PetscFree(cols)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1278 { 1279 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1280 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1281 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1282 MPI_Comm comm; 1283 DMBoundaryType bx, by, bz; 1284 ISLocalToGlobalMapping ltog, mltog; 1285 DMDAStencilType st; 1286 PetscBool removedups = PETSC_FALSE; 1287 1288 PetscFunctionBegin; 1289 /* 1290 nc - number of components per grid point 1291 col - number of colors needed in one direction for single component problem 1292 */ 1293 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1294 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1295 col = 2 * s + 1; 1296 1297 /* 1298 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1299 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1300 */ 1301 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1302 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1303 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1304 1305 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1306 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1307 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1308 1309 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1310 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1311 1312 PetscCall(MatSetBlockSize(J, nc)); 1313 /* determine the matrix preallocation information */ 1314 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1315 for (i = xs; i < xs + nx; i++) { 1316 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1317 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1318 for (j = ys; j < ys + ny; j++) { 1319 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1320 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1321 for (k = zs; k < zs + nz; k++) { 1322 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1323 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1324 1325 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1326 1327 cnt = 0; 1328 for (l = 0; l < nc; l++) { 1329 for (ii = istart; ii < iend + 1; ii++) { 1330 for (jj = jstart; jj < jend + 1; jj++) { 1331 for (kk = kstart; kk < kend + 1; kk++) { 1332 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1333 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1334 } 1335 } 1336 } 1337 } 1338 rows[l] = l + nc * (slot); 1339 } 1340 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1341 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1342 } 1343 } 1344 } 1345 PetscCall(MatSetBlockSize(J, nc)); 1346 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1347 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1348 MatPreallocateEnd(dnz, onz); 1349 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1350 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1351 1352 /* 1353 For each node in the grid: we get the neighbors in the local (on processor ordering 1354 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1355 PETSc ordering. 1356 */ 1357 if (!da->prealloc_only) { 1358 for (i = xs; i < xs + nx; i++) { 1359 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1360 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1361 for (j = ys; j < ys + ny; j++) { 1362 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1363 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1364 for (k = zs; k < zs + nz; k++) { 1365 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1366 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1367 1368 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1369 1370 cnt = 0; 1371 for (kk = kstart; kk < kend + 1; kk++) { 1372 for (jj = jstart; jj < jend + 1; jj++) { 1373 for (ii = istart; ii < iend + 1; ii++) { 1374 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1375 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1376 for (l = 1; l < nc; l++) { 1377 cols[cnt] = 1 + cols[cnt - 1]; 1378 cnt++; 1379 } 1380 } 1381 } 1382 } 1383 } 1384 rows[0] = nc * (slot); 1385 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1386 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1387 } 1388 } 1389 } 1390 /* do not copy values to GPU since they are all zero and not yet needed there */ 1391 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1392 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1393 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1394 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1395 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1396 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1397 } 1398 PetscCall(PetscFree2(rows, cols)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1403 { 1404 DM_DA *dd = (DM_DA *)da->data; 1405 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1406 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1407 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1408 DMBoundaryType bx; 1409 ISLocalToGlobalMapping ltog; 1410 PetscMPIInt rank, size; 1411 1412 PetscFunctionBegin; 1413 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1414 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1415 1416 /* 1417 nc - number of components per grid point 1418 */ 1419 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1420 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1421 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1422 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1423 1424 PetscCall(MatSetBlockSize(J, nc)); 1425 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1426 1427 /* 1428 note should be smaller for first and last process with no periodic 1429 does not handle dfill 1430 */ 1431 cnt = 0; 1432 /* coupling with process to the left */ 1433 for (i = 0; i < s; i++) { 1434 for (j = 0; j < nc; j++) { 1435 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1436 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1437 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1438 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1439 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1440 } 1441 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1442 cnt++; 1443 } 1444 } 1445 for (i = s; i < nx - s; i++) { 1446 for (j = 0; j < nc; j++) { 1447 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1448 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1449 cnt++; 1450 } 1451 } 1452 /* coupling with process to the right */ 1453 for (i = nx - s; i < nx; i++) { 1454 for (j = 0; j < nc; j++) { 1455 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1456 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1457 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1458 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1459 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1460 } 1461 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1462 cnt++; 1463 } 1464 } 1465 1466 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1467 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1468 PetscCall(PetscFree2(cols, ocols)); 1469 1470 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1471 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1472 1473 /* 1474 For each node in the grid: we get the neighbors in the local (on processor ordering 1475 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1476 PETSc ordering. 1477 */ 1478 if (!da->prealloc_only) { 1479 PetscCall(PetscMalloc1(maxcnt, &cols)); 1480 row = xs * nc; 1481 /* coupling with process to the left */ 1482 for (i = xs; i < xs + s; i++) { 1483 for (j = 0; j < nc; j++) { 1484 cnt = 0; 1485 if (rank) { 1486 for (l = 0; l < s; l++) { 1487 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1488 } 1489 } 1490 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1491 for (l = 0; l < s; l++) { 1492 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1493 } 1494 } 1495 if (dfill) { 1496 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1497 } else { 1498 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1499 } 1500 for (l = 0; l < s; l++) { 1501 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1502 } 1503 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1504 row++; 1505 } 1506 } 1507 for (i = xs + s; i < xs + nx - s; i++) { 1508 for (j = 0; j < nc; j++) { 1509 cnt = 0; 1510 for (l = 0; l < s; l++) { 1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1512 } 1513 if (dfill) { 1514 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1515 } else { 1516 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1517 } 1518 for (l = 0; l < s; l++) { 1519 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1520 } 1521 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1522 row++; 1523 } 1524 } 1525 /* coupling with process to the right */ 1526 for (i = xs + nx - s; i < xs + nx; i++) { 1527 for (j = 0; j < nc; j++) { 1528 cnt = 0; 1529 for (l = 0; l < s; l++) { 1530 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1531 } 1532 if (dfill) { 1533 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1534 } else { 1535 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1536 } 1537 if (rank < size - 1) { 1538 for (l = 0; l < s; l++) { 1539 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1540 } 1541 } 1542 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1543 for (l = 0; l < s; l++) { 1544 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1545 } 1546 } 1547 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1548 row++; 1549 } 1550 } 1551 PetscCall(PetscFree(cols)); 1552 /* do not copy values to GPU since they are all zero and not yet needed there */ 1553 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1554 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1555 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1556 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1557 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1558 } 1559 PetscFunctionReturn(PETSC_SUCCESS); 1560 } 1561 1562 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1563 { 1564 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1565 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1566 PetscInt istart, iend; 1567 DMBoundaryType bx; 1568 ISLocalToGlobalMapping ltog, mltog; 1569 1570 PetscFunctionBegin; 1571 /* 1572 nc - number of components per grid point 1573 col - number of colors needed in one direction for single component problem 1574 */ 1575 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1576 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1577 col = 2 * s + 1; 1578 1579 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1580 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1581 1582 PetscCall(MatSetBlockSize(J, nc)); 1583 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1584 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1585 1586 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1587 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1588 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1589 1590 /* 1591 For each node in the grid: we get the neighbors in the local (on processor ordering 1592 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1593 PETSc ordering. 1594 */ 1595 if (!da->prealloc_only) { 1596 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1597 for (i = xs; i < xs + nx; i++) { 1598 istart = PetscMax(-s, gxs - i); 1599 iend = PetscMin(s, gxs + gnx - i - 1); 1600 slot = i - gxs; 1601 1602 cnt = 0; 1603 for (i1 = istart; i1 < iend + 1; i1++) { 1604 cols[cnt++] = nc * (slot + i1); 1605 for (l = 1; l < nc; l++) { 1606 cols[cnt] = 1 + cols[cnt - 1]; 1607 cnt++; 1608 } 1609 } 1610 rows[0] = nc * (slot); 1611 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1612 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1613 } 1614 /* do not copy values to GPU since they are all zero and not yet needed there */ 1615 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1616 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1617 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1618 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1619 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1620 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1621 PetscCall(PetscFree2(rows, cols)); 1622 } 1623 PetscFunctionReturn(PETSC_SUCCESS); 1624 } 1625 1626 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1627 { 1628 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1629 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1630 PetscInt istart, iend; 1631 DMBoundaryType bx; 1632 ISLocalToGlobalMapping ltog, mltog; 1633 1634 PetscFunctionBegin; 1635 /* 1636 nc - number of components per grid point 1637 col - number of colors needed in one direction for single component problem 1638 */ 1639 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1640 col = 2 * s + 1; 1641 1642 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1643 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1644 1645 PetscCall(MatSetBlockSize(J, nc)); 1646 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1647 1648 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1649 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1650 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1651 1652 /* 1653 For each node in the grid: we get the neighbors in the local (on processor ordering 1654 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1655 PETSc ordering. 1656 */ 1657 if (!da->prealloc_only) { 1658 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1659 for (i = xs; i < xs + nx; i++) { 1660 istart = PetscMax(-s, gxs - i); 1661 iend = PetscMin(s, gxs + gnx - i - 1); 1662 slot = i - gxs; 1663 1664 cnt = 0; 1665 for (i1 = istart; i1 < iend + 1; i1++) { 1666 cols[cnt++] = nc * (slot + i1); 1667 for (l = 1; l < nc; l++) { 1668 cols[cnt] = 1 + cols[cnt - 1]; 1669 cnt++; 1670 } 1671 } 1672 rows[0] = nc * (slot); 1673 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1674 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1675 } 1676 /* do not copy values to GPU since they are all zero and not yet needed there */ 1677 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1678 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1679 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1680 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1681 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1682 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1683 PetscCall(PetscFree2(rows, cols)); 1684 } 1685 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1686 PetscFunctionReturn(PETSC_SUCCESS); 1687 } 1688 1689 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1690 { 1691 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1692 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1693 PetscInt istart, iend, jstart, jend, ii, jj; 1694 MPI_Comm comm; 1695 PetscScalar *values; 1696 DMBoundaryType bx, by; 1697 DMDAStencilType st; 1698 ISLocalToGlobalMapping ltog; 1699 1700 PetscFunctionBegin; 1701 /* 1702 nc - number of components per grid point 1703 col - number of colors needed in one direction for single component problem 1704 */ 1705 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1706 col = 2 * s + 1; 1707 1708 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1709 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1710 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1711 1712 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1713 1714 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1715 1716 /* determine the matrix preallocation information */ 1717 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1718 for (i = xs; i < xs + nx; i++) { 1719 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1720 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1721 for (j = ys; j < ys + ny; j++) { 1722 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1723 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1724 slot = i - gxs + gnx * (j - gys); 1725 1726 /* Find block columns in block row */ 1727 cnt = 0; 1728 for (ii = istart; ii < iend + 1; ii++) { 1729 for (jj = jstart; jj < jend + 1; jj++) { 1730 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1731 cols[cnt++] = slot + ii + gnx * jj; 1732 } 1733 } 1734 } 1735 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1736 } 1737 } 1738 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1739 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1740 MatPreallocateEnd(dnz, onz); 1741 1742 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1743 1744 /* 1745 For each node in the grid: we get the neighbors in the local (on processor ordering 1746 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1747 PETSc ordering. 1748 */ 1749 if (!da->prealloc_only) { 1750 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1751 for (i = xs; i < xs + nx; i++) { 1752 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1753 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1754 for (j = ys; j < ys + ny; j++) { 1755 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1756 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1757 slot = i - gxs + gnx * (j - gys); 1758 cnt = 0; 1759 for (ii = istart; ii < iend + 1; ii++) { 1760 for (jj = jstart; jj < jend + 1; jj++) { 1761 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1762 cols[cnt++] = slot + ii + gnx * jj; 1763 } 1764 } 1765 } 1766 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1767 } 1768 } 1769 PetscCall(PetscFree(values)); 1770 /* do not copy values to GPU since they are all zero and not yet needed there */ 1771 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1772 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1773 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1774 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1775 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1776 } 1777 PetscCall(PetscFree(cols)); 1778 PetscFunctionReturn(PETSC_SUCCESS); 1779 } 1780 1781 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1782 { 1783 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1784 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1785 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1786 MPI_Comm comm; 1787 PetscScalar *values; 1788 DMBoundaryType bx, by, bz; 1789 DMDAStencilType st; 1790 ISLocalToGlobalMapping ltog; 1791 1792 PetscFunctionBegin; 1793 /* 1794 nc - number of components per grid point 1795 col - number of colors needed in one direction for single component problem 1796 */ 1797 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1798 col = 2 * s + 1; 1799 1800 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1801 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1802 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1803 1804 PetscCall(PetscMalloc1(col * col * col, &cols)); 1805 1806 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1807 1808 /* determine the matrix preallocation information */ 1809 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1810 for (i = xs; i < xs + nx; i++) { 1811 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1812 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1813 for (j = ys; j < ys + ny; j++) { 1814 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1815 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1816 for (k = zs; k < zs + nz; k++) { 1817 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1818 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1819 1820 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1821 1822 /* Find block columns in block row */ 1823 cnt = 0; 1824 for (ii = istart; ii < iend + 1; ii++) { 1825 for (jj = jstart; jj < jend + 1; jj++) { 1826 for (kk = kstart; kk < kend + 1; kk++) { 1827 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1828 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1829 } 1830 } 1831 } 1832 } 1833 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1834 } 1835 } 1836 } 1837 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1838 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1839 MatPreallocateEnd(dnz, onz); 1840 1841 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1842 1843 /* 1844 For each node in the grid: we get the neighbors in the local (on processor ordering 1845 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1846 PETSc ordering. 1847 */ 1848 if (!da->prealloc_only) { 1849 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1850 for (i = xs; i < xs + nx; i++) { 1851 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1852 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1853 for (j = ys; j < ys + ny; j++) { 1854 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1855 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1856 for (k = zs; k < zs + nz; k++) { 1857 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1858 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1859 1860 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1861 1862 cnt = 0; 1863 for (ii = istart; ii < iend + 1; ii++) { 1864 for (jj = jstart; jj < jend + 1; jj++) { 1865 for (kk = kstart; kk < kend + 1; kk++) { 1866 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1867 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1868 } 1869 } 1870 } 1871 } 1872 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1873 } 1874 } 1875 } 1876 PetscCall(PetscFree(values)); 1877 /* do not copy values to GPU since they are all zero and not yet needed there */ 1878 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1879 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1880 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1881 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1882 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1883 } 1884 PetscCall(PetscFree(cols)); 1885 PetscFunctionReturn(PETSC_SUCCESS); 1886 } 1887 1888 /* 1889 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1890 identify in the local ordering with periodic domain. 1891 */ 1892 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1893 { 1894 PetscInt i, n; 1895 1896 PetscFunctionBegin; 1897 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1898 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1899 for (i = 0, n = 0; i < *cnt; i++) { 1900 if (col[i] >= *row) col[n++] = col[i]; 1901 } 1902 *cnt = n; 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1907 { 1908 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1909 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1910 PetscInt istart, iend, jstart, jend, ii, jj; 1911 MPI_Comm comm; 1912 PetscScalar *values; 1913 DMBoundaryType bx, by; 1914 DMDAStencilType st; 1915 ISLocalToGlobalMapping ltog; 1916 1917 PetscFunctionBegin; 1918 /* 1919 nc - number of components per grid point 1920 col - number of colors needed in one direction for single component problem 1921 */ 1922 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1923 col = 2 * s + 1; 1924 1925 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1926 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1927 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1928 1929 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1930 1931 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1932 1933 /* determine the matrix preallocation information */ 1934 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1935 for (i = xs; i < xs + nx; i++) { 1936 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1937 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1938 for (j = ys; j < ys + ny; j++) { 1939 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1940 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1941 slot = i - gxs + gnx * (j - gys); 1942 1943 /* Find block columns in block row */ 1944 cnt = 0; 1945 for (ii = istart; ii < iend + 1; ii++) { 1946 for (jj = jstart; jj < jend + 1; jj++) { 1947 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1948 } 1949 } 1950 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1951 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1952 } 1953 } 1954 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1955 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1956 MatPreallocateEnd(dnz, onz); 1957 1958 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1959 1960 /* 1961 For each node in the grid: we get the neighbors in the local (on processor ordering 1962 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1963 PETSc ordering. 1964 */ 1965 if (!da->prealloc_only) { 1966 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1967 for (i = xs; i < xs + nx; i++) { 1968 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1969 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1970 for (j = ys; j < ys + ny; j++) { 1971 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1972 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1973 slot = i - gxs + gnx * (j - gys); 1974 1975 /* Find block columns in block row */ 1976 cnt = 0; 1977 for (ii = istart; ii < iend + 1; ii++) { 1978 for (jj = jstart; jj < jend + 1; jj++) { 1979 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1980 } 1981 } 1982 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1983 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1984 } 1985 } 1986 PetscCall(PetscFree(values)); 1987 /* do not copy values to GPU since they are all zero and not yet needed there */ 1988 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1989 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1991 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1992 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1993 } 1994 PetscCall(PetscFree(cols)); 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1999 { 2000 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2001 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2002 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2003 MPI_Comm comm; 2004 PetscScalar *values; 2005 DMBoundaryType bx, by, bz; 2006 DMDAStencilType st; 2007 ISLocalToGlobalMapping ltog; 2008 2009 PetscFunctionBegin; 2010 /* 2011 nc - number of components per grid point 2012 col - number of colors needed in one direction for single component problem 2013 */ 2014 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2015 col = 2 * s + 1; 2016 2017 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2018 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2019 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2020 2021 /* create the matrix */ 2022 PetscCall(PetscMalloc1(col * col * col, &cols)); 2023 2024 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2025 2026 /* determine the matrix preallocation information */ 2027 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2028 for (i = xs; i < xs + nx; i++) { 2029 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2030 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2031 for (j = ys; j < ys + ny; j++) { 2032 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2033 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2034 for (k = zs; k < zs + nz; k++) { 2035 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2036 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2037 2038 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2039 2040 /* Find block columns in block row */ 2041 cnt = 0; 2042 for (ii = istart; ii < iend + 1; ii++) { 2043 for (jj = jstart; jj < jend + 1; jj++) { 2044 for (kk = kstart; kk < kend + 1; kk++) { 2045 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2046 } 2047 } 2048 } 2049 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2050 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2051 } 2052 } 2053 } 2054 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2055 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2056 MatPreallocateEnd(dnz, onz); 2057 2058 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2059 2060 /* 2061 For each node in the grid: we get the neighbors in the local (on processor ordering 2062 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2063 PETSc ordering. 2064 */ 2065 if (!da->prealloc_only) { 2066 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2067 for (i = xs; i < xs + nx; i++) { 2068 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2069 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2070 for (j = ys; j < ys + ny; j++) { 2071 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2072 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2073 for (k = zs; k < zs + nz; k++) { 2074 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2075 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2076 2077 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2078 2079 cnt = 0; 2080 for (ii = istart; ii < iend + 1; ii++) { 2081 for (jj = jstart; jj < jend + 1; jj++) { 2082 for (kk = kstart; kk < kend + 1; kk++) { 2083 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2084 } 2085 } 2086 } 2087 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2088 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2089 } 2090 } 2091 } 2092 PetscCall(PetscFree(values)); 2093 /* do not copy values to GPU since they are all zero and not yet needed there */ 2094 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2095 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2096 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2097 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2098 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2099 } 2100 PetscCall(PetscFree(cols)); 2101 PetscFunctionReturn(PETSC_SUCCESS); 2102 } 2103 2104 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2105 { 2106 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2107 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2108 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2109 DM_DA *dd = (DM_DA *)da->data; 2110 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2111 MPI_Comm comm; 2112 PetscScalar *values; 2113 DMBoundaryType bx, by, bz; 2114 ISLocalToGlobalMapping ltog; 2115 DMDAStencilType st; 2116 PetscBool removedups = PETSC_FALSE; 2117 2118 PetscFunctionBegin; 2119 /* 2120 nc - number of components per grid point 2121 col - number of colors needed in one direction for single component problem 2122 */ 2123 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2124 col = 2 * s + 1; 2125 2126 /* 2127 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2128 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2129 */ 2130 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2131 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2132 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2133 2134 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2135 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2136 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2137 2138 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2139 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2140 2141 /* determine the matrix preallocation information */ 2142 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2143 2144 PetscCall(MatSetBlockSize(J, nc)); 2145 for (i = xs; i < xs + nx; i++) { 2146 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2147 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2148 for (j = ys; j < ys + ny; j++) { 2149 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2150 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2151 for (k = zs; k < zs + nz; k++) { 2152 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2153 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2154 2155 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2156 2157 for (l = 0; l < nc; l++) { 2158 cnt = 0; 2159 for (ii = istart; ii < iend + 1; ii++) { 2160 for (jj = jstart; jj < jend + 1; jj++) { 2161 for (kk = kstart; kk < kend + 1; kk++) { 2162 if (ii || jj || kk) { 2163 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2164 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2165 } 2166 } else { 2167 if (dfill) { 2168 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2169 } else { 2170 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2171 } 2172 } 2173 } 2174 } 2175 } 2176 row = l + nc * (slot); 2177 maxcnt = PetscMax(maxcnt, cnt); 2178 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2179 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2180 } 2181 } 2182 } 2183 } 2184 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2185 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2186 MatPreallocateEnd(dnz, onz); 2187 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2188 2189 /* 2190 For each node in the grid: we get the neighbors in the local (on processor ordering 2191 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2192 PETSc ordering. 2193 */ 2194 if (!da->prealloc_only) { 2195 PetscCall(PetscCalloc1(maxcnt, &values)); 2196 for (i = xs; i < xs + nx; i++) { 2197 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2198 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2199 for (j = ys; j < ys + ny; j++) { 2200 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2201 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2202 for (k = zs; k < zs + nz; k++) { 2203 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2204 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2205 2206 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2207 2208 for (l = 0; l < nc; l++) { 2209 cnt = 0; 2210 for (ii = istart; ii < iend + 1; ii++) { 2211 for (jj = jstart; jj < jend + 1; jj++) { 2212 for (kk = kstart; kk < kend + 1; kk++) { 2213 if (ii || jj || kk) { 2214 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2215 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2216 } 2217 } else { 2218 if (dfill) { 2219 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2220 } else { 2221 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2222 } 2223 } 2224 } 2225 } 2226 } 2227 row = l + nc * (slot); 2228 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2229 } 2230 } 2231 } 2232 } 2233 PetscCall(PetscFree(values)); 2234 /* do not copy values to GPU since they are all zero and not yet needed there */ 2235 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2236 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2237 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2238 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2239 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2240 } 2241 PetscCall(PetscFree(cols)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244