1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 72 columns to the left with any nonzeros in them plus 1 */ 73 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 74 for (i = 0; i < dd->w; i++) { 75 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 76 } 77 for (i = 0; i < dd->w; i++) { 78 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 79 } 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 /*@ 84 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 85 of the matrix returned by `DMCreateMatrix()`. 86 87 Logically Collective 88 89 Input Parameters: 90 + da - the `DMDA` 91 . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block) 92 - ofill - the fill pattern in the off-diagonal blocks 93 94 Level: developer 95 96 Notes: 97 This only makes sense when you are doing multicomponent problems but using the 98 `MATMPIAIJ` matrix format 99 100 The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries 101 representing coupling and 0 entries for missing coupling. For example 102 .vb 103 dfill[9] = {1, 0, 0, 104 1, 1, 0, 105 0, 1, 1} 106 .ve 107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 109 diagonal block). 110 111 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 112 can be represented in the `dfill`, `ofill` format 113 114 Contributed by\: 115 Glenn Hammond 116 117 .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the `DMDA` 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by\: 169 Philip C. Roth 170 171 .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 264 { 265 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 266 PetscInt ncolors = 0; 267 MPI_Comm comm; 268 DMBoundaryType bx, by; 269 DMDAStencilType st; 270 ISColoringValue *colors; 271 DM_DA *dd = (DM_DA *)da->data; 272 273 PetscFunctionBegin; 274 /* 275 nc - number of components per grid point 276 col - number of colors needed in one direction for single component problem 277 278 */ 279 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 280 col = 2 * s + 1; 281 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 282 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 283 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 284 285 /* special case as taught to us by Paul Hovland */ 286 if (st == DMDA_STENCIL_STAR && s == 1) { 287 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 288 } else { 289 if (ctype == IS_COLORING_GLOBAL) { 290 if (!dd->localcoloring) { 291 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 292 ii = 0; 293 for (j = ys; j < ys + ny; j++) { 294 for (i = xs; i < xs + nx; i++) { 295 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 296 } 297 } 298 ncolors = nc + nc * (col - 1 + col * (col - 1)); 299 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 300 } 301 *coloring = dd->localcoloring; 302 } else if (ctype == IS_COLORING_LOCAL) { 303 if (!dd->ghostedcoloring) { 304 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 305 ii = 0; 306 for (j = gys; j < gys + gny; j++) { 307 for (i = gxs; i < gxs + gnx; i++) { 308 for (k = 0; k < nc; k++) { 309 /* the complicated stuff is to handle periodic boundaries */ 310 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 311 } 312 } 313 } 314 ncolors = nc + nc * (col - 1 + col * (col - 1)); 315 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 316 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 317 318 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 319 } 320 *coloring = dd->ghostedcoloring; 321 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 322 } 323 PetscCall(ISColoringReference(*coloring)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 328 { 329 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 330 PetscInt ncolors; 331 MPI_Comm comm; 332 DMBoundaryType bx, by, bz; 333 DMDAStencilType st; 334 ISColoringValue *colors; 335 DM_DA *dd = (DM_DA *)da->data; 336 337 PetscFunctionBegin; 338 /* 339 nc - number of components per grid point 340 col - number of colors needed in one direction for single component problem 341 */ 342 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 343 col = 2 * s + 1; 344 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1"); 345 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1"); 346 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1"); 347 348 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 349 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 350 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 351 352 /* create the coloring */ 353 if (ctype == IS_COLORING_GLOBAL) { 354 if (!dd->localcoloring) { 355 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 356 ii = 0; 357 for (k = zs; k < zs + nz; k++) { 358 for (j = ys; j < ys + ny; j++) { 359 for (i = xs; i < xs + nx; i++) { 360 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 361 } 362 } 363 } 364 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 365 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 366 } 367 *coloring = dd->localcoloring; 368 } else if (ctype == IS_COLORING_LOCAL) { 369 if (!dd->ghostedcoloring) { 370 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 371 ii = 0; 372 for (k = gzs; k < gzs + gnz; k++) { 373 for (j = gys; j < gys + gny; j++) { 374 for (i = gxs; i < gxs + gnx; i++) { 375 for (l = 0; l < nc; l++) { 376 /* the complicated stuff is to handle periodic boundaries */ 377 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 378 } 379 } 380 } 381 } 382 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 383 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 384 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 385 } 386 *coloring = dd->ghostedcoloring; 387 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 388 PetscCall(ISColoringReference(*coloring)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 393 { 394 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 395 PetscInt ncolors; 396 MPI_Comm comm; 397 DMBoundaryType bx; 398 ISColoringValue *colors; 399 DM_DA *dd = (DM_DA *)da->data; 400 401 PetscFunctionBegin; 402 /* 403 nc - number of components per grid point 404 col - number of colors needed in one direction for single component problem 405 */ 406 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 407 col = 2 * s + 1; 408 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 409 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 410 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 411 412 /* create the coloring */ 413 if (ctype == IS_COLORING_GLOBAL) { 414 if (!dd->localcoloring) { 415 PetscCall(PetscMalloc1(nc * nx, &colors)); 416 if (dd->ofillcols) { 417 PetscInt tc = 0; 418 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 419 i1 = 0; 420 for (i = xs; i < xs + nx; i++) { 421 for (l = 0; l < nc; l++) { 422 if (dd->ofillcols[l] && (i % col)) { 423 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 424 } else { 425 colors[i1++] = l; 426 } 427 } 428 } 429 ncolors = nc + 2 * s * tc; 430 } else { 431 i1 = 0; 432 for (i = xs; i < xs + nx; i++) { 433 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 434 } 435 ncolors = nc + nc * (col - 1); 436 } 437 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 438 } 439 *coloring = dd->localcoloring; 440 } else if (ctype == IS_COLORING_LOCAL) { 441 if (!dd->ghostedcoloring) { 442 PetscCall(PetscMalloc1(nc * gnx, &colors)); 443 i1 = 0; 444 for (i = gxs; i < gxs + gnx; i++) { 445 for (l = 0; l < nc; l++) { 446 /* the complicated stuff is to handle periodic boundaries */ 447 colors[i1++] = l + nc * (SetInRange(i, m) % col); 448 } 449 } 450 ncolors = nc + nc * (col - 1); 451 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 452 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 453 } 454 *coloring = dd->ghostedcoloring; 455 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 456 PetscCall(ISColoringReference(*coloring)); 457 PetscFunctionReturn(PETSC_SUCCESS); 458 } 459 460 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 461 { 462 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 463 PetscInt ncolors; 464 MPI_Comm comm; 465 DMBoundaryType bx, by; 466 ISColoringValue *colors; 467 DM_DA *dd = (DM_DA *)da->data; 468 469 PetscFunctionBegin; 470 /* 471 nc - number of components per grid point 472 col - number of colors needed in one direction for single component problem 473 */ 474 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 475 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 476 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 477 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 478 /* create the coloring */ 479 if (ctype == IS_COLORING_GLOBAL) { 480 if (!dd->localcoloring) { 481 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 482 ii = 0; 483 for (j = ys; j < ys + ny; j++) { 484 for (i = xs; i < xs + nx; i++) { 485 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 486 } 487 } 488 ncolors = 5 * nc; 489 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 490 } 491 *coloring = dd->localcoloring; 492 } else if (ctype == IS_COLORING_LOCAL) { 493 if (!dd->ghostedcoloring) { 494 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 495 ii = 0; 496 for (j = gys; j < gys + gny; j++) { 497 for (i = gxs; i < gxs + gnx; i++) { 498 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 499 } 500 } 501 ncolors = 5 * nc; 502 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 503 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 504 } 505 *coloring = dd->ghostedcoloring; 506 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 /* =========================================================================== */ 511 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 512 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 513 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 525 526 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 527 { 528 DM da; 529 const char *prefix; 530 Mat AA, Anatural; 531 AO ao; 532 PetscInt rstart, rend, *petsc, i; 533 IS is; 534 MPI_Comm comm; 535 PetscViewerFormat format; 536 PetscBool flag; 537 538 PetscFunctionBegin; 539 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 540 PetscCall(PetscViewerGetFormat(viewer, &format)); 541 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 542 543 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 544 PetscCall(MatGetDM(A, &da)); 545 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 546 547 PetscCall(DMDAGetAO(da, &ao)); 548 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 549 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 550 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 551 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 552 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 553 554 /* call viewer on natural ordering */ 555 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 556 if (flag) { 557 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 558 A = AA; 559 } 560 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 561 PetscCall(ISDestroy(&is)); 562 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 563 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 564 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 565 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 566 PetscCall(MatView(Anatural, viewer)); 567 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 568 PetscCall(MatDestroy(&Anatural)); 569 if (flag) PetscCall(MatDestroy(&AA)); 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 574 { 575 DM da; 576 Mat Anatural, Aapp; 577 AO ao; 578 PetscInt rstart, rend, *app, i, m, n, M, N; 579 IS is; 580 MPI_Comm comm; 581 582 PetscFunctionBegin; 583 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 584 PetscCall(MatGetDM(A, &da)); 585 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 586 587 /* Load the matrix in natural ordering */ 588 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 589 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 590 PetscCall(MatGetSize(A, &M, &N)); 591 PetscCall(MatGetLocalSize(A, &m, &n)); 592 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 593 PetscCall(MatLoad(Anatural, viewer)); 594 595 /* Map natural ordering to application ordering and create IS */ 596 PetscCall(DMDAGetAO(da, &ao)); 597 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 598 PetscCall(PetscMalloc1(rend - rstart, &app)); 599 for (i = rstart; i < rend; i++) app[i - rstart] = i; 600 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 601 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 602 603 /* Do permutation and replace header */ 604 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 605 PetscCall(MatHeaderReplace(A, &Aapp)); 606 PetscCall(ISDestroy(&is)); 607 PetscCall(MatDestroy(&Anatural)); 608 PetscFunctionReturn(PETSC_SUCCESS); 609 } 610 611 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 612 { 613 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 614 Mat A; 615 MPI_Comm comm; 616 MatType Atype; 617 MatType mtype; 618 PetscMPIInt size; 619 DM_DA *dd = (DM_DA *)da->data; 620 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 621 622 PetscFunctionBegin; 623 PetscCall(MatInitializePackage()); 624 mtype = da->mattype; 625 626 /* 627 m 628 ------------------------------------------------------ 629 | | 630 | | 631 | ---------------------- | 632 | | | | 633 n | ny | | | 634 | | | | 635 | .--------------------- | 636 | (xs,ys) nx | 637 | . | 638 | (gxs,gys) | 639 | | 640 ----------------------------------------------------- 641 */ 642 643 /* 644 nc - number of components per grid point 645 col - number of colors needed in one direction for single component problem 646 */ 647 M = dd->M; 648 N = dd->N; 649 P = dd->P; 650 dim = da->dim; 651 dof = dd->w; 652 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 653 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 654 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 655 PetscCall(MatCreate(comm, &A)); 656 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 657 PetscCall(MatSetType(A, mtype)); 658 PetscCall(MatSetFromOptions(A)); 659 if (dof * nx * ny * nz < da->bind_below) { 660 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 661 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 662 } 663 PetscCall(MatSetDM(A, da)); 664 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 665 PetscCall(MatGetType(A, &Atype)); 666 /* 667 We do not provide a getmatrix function in the DMDA operations because 668 the basic DMDA does not know about matrices. We think of DMDA as being more 669 more low-level than matrices. This is kind of cheating but, cause sometimes 670 we think of DMDA has higher level than matrices. 671 672 We could switch based on Atype (or mtype), but we do not since the 673 specialized setting routines depend only on the particular preallocation 674 details of the matrix, not the type itself. 675 */ 676 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 677 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 678 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 679 if (!aij) { 680 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 681 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 682 if (!baij) { 683 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 684 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 685 if (!sbaij) { 686 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 687 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 688 } 689 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 690 } 691 } 692 } 693 if (aij) { 694 if (dim == 1) { 695 if (dd->ofill) { 696 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 697 } else { 698 DMBoundaryType bx; 699 PetscMPIInt size; 700 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 701 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 702 if (size == 1 && bx == DM_BOUNDARY_NONE) { 703 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 704 } else { 705 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 706 } 707 } 708 } else if (dim == 2) { 709 if (dd->ofill) { 710 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 711 } else { 712 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 713 } 714 } else if (dim == 3) { 715 if (dd->ofill) { 716 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 717 } else { 718 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 719 } 720 } 721 } else if (baij) { 722 if (dim == 2) { 723 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 724 } else if (dim == 3) { 725 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 726 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 727 } else if (sbaij) { 728 if (dim == 2) { 729 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 730 } else if (dim == 3) { 731 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 732 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 733 } else if (sell) { 734 if (dim == 2) { 735 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 736 } else if (dim == 3) { 737 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 738 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 739 } else if (is) { 740 PetscCall(DMCreateMatrix_DA_IS(da, A)); 741 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 742 ISLocalToGlobalMapping ltog; 743 744 PetscCall(MatSetBlockSize(A, dof)); 745 PetscCall(MatSetUp(A)); 746 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 747 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 748 } 749 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 750 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 751 PetscCall(MatSetDM(A, da)); 752 PetscCallMPI(MPI_Comm_size(comm, &size)); 753 if (size > 1) { 754 /* change viewer to display matrix in natural ordering */ 755 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 756 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 757 } 758 *J = A; 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 763 764 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 765 { 766 DM_DA *da = (DM_DA *)dm->data; 767 Mat lJ, P; 768 ISLocalToGlobalMapping ltog; 769 IS is; 770 PetscBT bt; 771 const PetscInt *e_loc, *idx; 772 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 773 774 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 775 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 776 PetscFunctionBegin; 777 dof = da->w; 778 PetscCall(MatSetBlockSize(J, dof)); 779 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 780 781 /* flag local elements indices in local DMDA numbering */ 782 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 783 PetscCall(PetscBTCreate(nv / dof, &bt)); 784 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 785 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 786 787 /* filter out (set to -1) the global indices not used by the local elements */ 788 PetscCall(PetscMalloc1(nv / dof, &gidx)); 789 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 790 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 791 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 792 for (i = 0; i < nv / dof; i++) 793 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 794 PetscCall(PetscBTDestroy(&bt)); 795 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 796 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 797 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 798 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 799 PetscCall(ISDestroy(&is)); 800 801 /* Preallocation */ 802 if (dm->prealloc_skip) { 803 PetscCall(MatSetUp(J)); 804 } else { 805 PetscCall(MatISGetLocalMat(J, &lJ)); 806 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 807 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 808 PetscCall(MatSetType(P, MATPREALLOCATOR)); 809 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 810 PetscCall(MatGetSize(lJ, &N, NULL)); 811 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 812 PetscCall(MatSetSizes(P, n, n, N, N)); 813 PetscCall(MatSetBlockSize(P, dof)); 814 PetscCall(MatSetUp(P)); 815 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 816 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 817 PetscCall(MatISRestoreLocalMat(J, &lJ)); 818 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 819 PetscCall(MatDestroy(&P)); 820 821 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 822 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 823 } 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 828 { 829 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 830 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 831 MPI_Comm comm; 832 PetscScalar *values; 833 DMBoundaryType bx, by; 834 ISLocalToGlobalMapping ltog; 835 DMDAStencilType st; 836 837 PetscFunctionBegin; 838 /* 839 nc - number of components per grid point 840 col - number of colors needed in one direction for single component problem 841 */ 842 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 843 col = 2 * s + 1; 844 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 845 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 846 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 847 848 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 849 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 850 851 PetscCall(MatSetBlockSize(J, nc)); 852 /* determine the matrix preallocation information */ 853 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 854 for (i = xs; i < xs + nx; i++) { 855 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 856 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 857 858 for (j = ys; j < ys + ny; j++) { 859 slot = i - gxs + gnx * (j - gys); 860 861 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 862 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 863 864 cnt = 0; 865 for (k = 0; k < nc; k++) { 866 for (l = lstart; l < lend + 1; l++) { 867 for (p = pstart; p < pend + 1; p++) { 868 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 869 cols[cnt++] = k + nc * (slot + gnx * l + p); 870 } 871 } 872 } 873 rows[k] = k + nc * (slot); 874 } 875 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 876 } 877 } 878 PetscCall(MatSetBlockSize(J, nc)); 879 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 880 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 881 MatPreallocateEnd(dnz, onz); 882 883 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 884 885 /* 886 For each node in the grid: we get the neighbors in the local (on processor ordering 887 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 888 PETSc ordering. 889 */ 890 if (!da->prealloc_only) { 891 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 892 for (i = xs; i < xs + nx; i++) { 893 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 894 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 895 896 for (j = ys; j < ys + ny; j++) { 897 slot = i - gxs + gnx * (j - gys); 898 899 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 900 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 901 902 cnt = 0; 903 for (k = 0; k < nc; k++) { 904 for (l = lstart; l < lend + 1; l++) { 905 for (p = pstart; p < pend + 1; p++) { 906 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 907 cols[cnt++] = k + nc * (slot + gnx * l + p); 908 } 909 } 910 } 911 rows[k] = k + nc * (slot); 912 } 913 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 914 } 915 } 916 PetscCall(PetscFree(values)); 917 /* do not copy values to GPU since they are all zero and not yet needed there */ 918 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 919 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 920 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 921 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 922 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 923 } 924 PetscCall(PetscFree2(rows, cols)); 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 929 { 930 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 931 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 932 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 933 MPI_Comm comm; 934 PetscScalar *values; 935 DMBoundaryType bx, by, bz; 936 ISLocalToGlobalMapping ltog; 937 DMDAStencilType st; 938 939 PetscFunctionBegin; 940 /* 941 nc - number of components per grid point 942 col - number of colors needed in one direction for single component problem 943 */ 944 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 945 col = 2 * s + 1; 946 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 947 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 948 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 949 950 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 951 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 952 953 PetscCall(MatSetBlockSize(J, nc)); 954 /* determine the matrix preallocation information */ 955 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 956 for (i = xs; i < xs + nx; i++) { 957 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 958 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 959 for (j = ys; j < ys + ny; j++) { 960 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 961 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 962 for (k = zs; k < zs + nz; k++) { 963 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 964 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 965 966 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 967 968 cnt = 0; 969 for (l = 0; l < nc; l++) { 970 for (ii = istart; ii < iend + 1; ii++) { 971 for (jj = jstart; jj < jend + 1; jj++) { 972 for (kk = kstart; kk < kend + 1; kk++) { 973 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 974 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 975 } 976 } 977 } 978 } 979 rows[l] = l + nc * (slot); 980 } 981 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 982 } 983 } 984 } 985 PetscCall(MatSetBlockSize(J, nc)); 986 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 987 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 988 MatPreallocateEnd(dnz, onz); 989 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 990 991 /* 992 For each node in the grid: we get the neighbors in the local (on processor ordering 993 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 994 PETSc ordering. 995 */ 996 if (!da->prealloc_only) { 997 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 998 for (i = xs; i < xs + nx; i++) { 999 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1000 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1001 for (j = ys; j < ys + ny; j++) { 1002 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1003 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1004 for (k = zs; k < zs + nz; k++) { 1005 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1006 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1007 1008 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1009 1010 cnt = 0; 1011 for (l = 0; l < nc; l++) { 1012 for (ii = istart; ii < iend + 1; ii++) { 1013 for (jj = jstart; jj < jend + 1; jj++) { 1014 for (kk = kstart; kk < kend + 1; kk++) { 1015 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1016 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1017 } 1018 } 1019 } 1020 } 1021 rows[l] = l + nc * (slot); 1022 } 1023 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1024 } 1025 } 1026 } 1027 PetscCall(PetscFree(values)); 1028 /* do not copy values to GPU since they are all zero and not yet needed there */ 1029 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1030 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1031 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1032 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1033 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1034 } 1035 PetscCall(PetscFree2(rows, cols)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1040 { 1041 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1042 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1043 MPI_Comm comm; 1044 DMBoundaryType bx, by; 1045 ISLocalToGlobalMapping ltog, mltog; 1046 DMDAStencilType st; 1047 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1048 1049 PetscFunctionBegin; 1050 /* 1051 nc - number of components per grid point 1052 col - number of colors needed in one direction for single component problem 1053 */ 1054 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1055 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1056 col = 2 * s + 1; 1057 /* 1058 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1059 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1060 */ 1061 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1062 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1063 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1064 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1065 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1066 1067 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1068 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1069 1070 PetscCall(MatSetBlockSize(J, nc)); 1071 /* determine the matrix preallocation information */ 1072 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1073 for (i = xs; i < xs + nx; i++) { 1074 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1075 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1076 1077 for (j = ys; j < ys + ny; j++) { 1078 slot = i - gxs + gnx * (j - gys); 1079 1080 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1081 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1082 1083 cnt = 0; 1084 for (k = 0; k < nc; k++) { 1085 for (l = lstart; l < lend + 1; l++) { 1086 for (p = pstart; p < pend + 1; p++) { 1087 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1088 cols[cnt++] = k + nc * (slot + gnx * l + p); 1089 } 1090 } 1091 } 1092 rows[k] = k + nc * (slot); 1093 } 1094 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1095 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1096 } 1097 } 1098 PetscCall(MatSetBlockSize(J, nc)); 1099 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1100 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1101 MatPreallocateEnd(dnz, onz); 1102 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1103 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1104 1105 /* 1106 For each node in the grid: we get the neighbors in the local (on processor ordering 1107 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1108 PETSc ordering. 1109 */ 1110 if (!da->prealloc_only) { 1111 for (i = xs; i < xs + nx; i++) { 1112 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1113 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1114 1115 for (j = ys; j < ys + ny; j++) { 1116 slot = i - gxs + gnx * (j - gys); 1117 1118 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1119 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1120 1121 cnt = 0; 1122 for (l = lstart; l < lend + 1; l++) { 1123 for (p = pstart; p < pend + 1; p++) { 1124 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1125 cols[cnt++] = nc * (slot + gnx * l + p); 1126 for (k = 1; k < nc; k++) { 1127 cols[cnt] = 1 + cols[cnt - 1]; 1128 cnt++; 1129 } 1130 } 1131 } 1132 } 1133 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1134 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1135 } 1136 } 1137 /* do not copy values to GPU since they are all zero and not yet needed there */ 1138 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1139 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1140 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1141 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1142 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1143 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1144 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1145 } 1146 PetscCall(PetscFree2(rows, cols)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1151 { 1152 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1153 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1154 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1155 DM_DA *dd = (DM_DA *)da->data; 1156 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1157 MPI_Comm comm; 1158 DMBoundaryType bx, by; 1159 ISLocalToGlobalMapping ltog; 1160 DMDAStencilType st; 1161 PetscBool removedups = PETSC_FALSE; 1162 1163 PetscFunctionBegin; 1164 /* 1165 nc - number of components per grid point 1166 col - number of colors needed in one direction for single component problem 1167 */ 1168 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1169 col = 2 * s + 1; 1170 /* 1171 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1172 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1173 */ 1174 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1175 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1176 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1177 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1178 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1179 1180 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1181 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1182 1183 PetscCall(MatSetBlockSize(J, nc)); 1184 /* determine the matrix preallocation information */ 1185 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1186 for (i = xs; i < xs + nx; i++) { 1187 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1188 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1189 1190 for (j = ys; j < ys + ny; j++) { 1191 slot = i - gxs + gnx * (j - gys); 1192 1193 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1194 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1195 1196 for (k = 0; k < nc; k++) { 1197 cnt = 0; 1198 for (l = lstart; l < lend + 1; l++) { 1199 for (p = pstart; p < pend + 1; p++) { 1200 if (l || p) { 1201 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1202 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1203 } 1204 } else { 1205 if (dfill) { 1206 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1207 } else { 1208 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1209 } 1210 } 1211 } 1212 } 1213 row = k + nc * (slot); 1214 maxcnt = PetscMax(maxcnt, cnt); 1215 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1216 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1217 } 1218 } 1219 } 1220 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1221 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1222 MatPreallocateEnd(dnz, onz); 1223 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1224 1225 /* 1226 For each node in the grid: we get the neighbors in the local (on processor ordering 1227 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1228 PETSc ordering. 1229 */ 1230 if (!da->prealloc_only) { 1231 for (i = xs; i < xs + nx; i++) { 1232 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1233 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1234 1235 for (j = ys; j < ys + ny; j++) { 1236 slot = i - gxs + gnx * (j - gys); 1237 1238 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1239 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1240 1241 for (k = 0; k < nc; k++) { 1242 cnt = 0; 1243 for (l = lstart; l < lend + 1; l++) { 1244 for (p = pstart; p < pend + 1; p++) { 1245 if (l || p) { 1246 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1247 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1248 } 1249 } else { 1250 if (dfill) { 1251 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1252 } else { 1253 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1254 } 1255 } 1256 } 1257 } 1258 row = k + nc * (slot); 1259 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1260 } 1261 } 1262 } 1263 /* do not copy values to GPU since they are all zero and not yet needed there */ 1264 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1265 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1266 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1267 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1268 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1269 } 1270 PetscCall(PetscFree(cols)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 1274 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1275 { 1276 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1277 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1278 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1279 MPI_Comm comm; 1280 DMBoundaryType bx, by, bz; 1281 ISLocalToGlobalMapping ltog, mltog; 1282 DMDAStencilType st; 1283 PetscBool removedups = PETSC_FALSE; 1284 1285 PetscFunctionBegin; 1286 /* 1287 nc - number of components per grid point 1288 col - number of colors needed in one direction for single component problem 1289 */ 1290 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1291 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1292 col = 2 * s + 1; 1293 1294 /* 1295 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1296 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1297 */ 1298 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1299 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1300 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1301 1302 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1303 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1304 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1305 1306 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1307 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1308 1309 PetscCall(MatSetBlockSize(J, nc)); 1310 /* determine the matrix preallocation information */ 1311 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1312 for (i = xs; i < xs + nx; i++) { 1313 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1314 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1315 for (j = ys; j < ys + ny; j++) { 1316 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1317 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1318 for (k = zs; k < zs + nz; k++) { 1319 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1320 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1321 1322 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1323 1324 cnt = 0; 1325 for (l = 0; l < nc; l++) { 1326 for (ii = istart; ii < iend + 1; ii++) { 1327 for (jj = jstart; jj < jend + 1; jj++) { 1328 for (kk = kstart; kk < kend + 1; kk++) { 1329 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1330 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1331 } 1332 } 1333 } 1334 } 1335 rows[l] = l + nc * (slot); 1336 } 1337 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1338 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1339 } 1340 } 1341 } 1342 PetscCall(MatSetBlockSize(J, nc)); 1343 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1344 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1345 MatPreallocateEnd(dnz, onz); 1346 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1347 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1348 1349 /* 1350 For each node in the grid: we get the neighbors in the local (on processor ordering 1351 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1352 PETSc ordering. 1353 */ 1354 if (!da->prealloc_only) { 1355 for (i = xs; i < xs + nx; i++) { 1356 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1357 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1358 for (j = ys; j < ys + ny; j++) { 1359 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1360 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1361 for (k = zs; k < zs + nz; k++) { 1362 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1363 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1364 1365 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1366 1367 cnt = 0; 1368 for (kk = kstart; kk < kend + 1; kk++) { 1369 for (jj = jstart; jj < jend + 1; jj++) { 1370 for (ii = istart; ii < iend + 1; ii++) { 1371 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1372 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1373 for (l = 1; l < nc; l++) { 1374 cols[cnt] = 1 + cols[cnt - 1]; 1375 cnt++; 1376 } 1377 } 1378 } 1379 } 1380 } 1381 rows[0] = nc * (slot); 1382 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1383 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1384 } 1385 } 1386 } 1387 /* do not copy values to GPU since they are all zero and not yet needed there */ 1388 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1389 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1390 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1391 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1392 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1393 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1394 } 1395 PetscCall(PetscFree2(rows, cols)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1400 { 1401 DM_DA *dd = (DM_DA *)da->data; 1402 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1403 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1404 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1405 DMBoundaryType bx; 1406 ISLocalToGlobalMapping ltog; 1407 PetscMPIInt rank, size; 1408 1409 PetscFunctionBegin; 1410 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1411 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1412 1413 /* 1414 nc - number of components per grid point 1415 */ 1416 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1417 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1418 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1419 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1420 1421 PetscCall(MatSetBlockSize(J, nc)); 1422 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1423 1424 /* 1425 note should be smaller for first and last process with no periodic 1426 does not handle dfill 1427 */ 1428 cnt = 0; 1429 /* coupling with process to the left */ 1430 for (i = 0; i < s; i++) { 1431 for (j = 0; j < nc; j++) { 1432 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1433 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1434 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1435 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1436 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1437 } 1438 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1439 cnt++; 1440 } 1441 } 1442 for (i = s; i < nx - s; i++) { 1443 for (j = 0; j < nc; j++) { 1444 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1445 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1446 cnt++; 1447 } 1448 } 1449 /* coupling with process to the right */ 1450 for (i = nx - s; i < nx; i++) { 1451 for (j = 0; j < nc; j++) { 1452 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1453 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1454 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1455 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1456 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1457 } 1458 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1459 cnt++; 1460 } 1461 } 1462 1463 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1464 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1465 PetscCall(PetscFree2(cols, ocols)); 1466 1467 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1468 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1469 1470 /* 1471 For each node in the grid: we get the neighbors in the local (on processor ordering 1472 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1473 PETSc ordering. 1474 */ 1475 if (!da->prealloc_only) { 1476 PetscCall(PetscMalloc1(maxcnt, &cols)); 1477 row = xs * nc; 1478 /* coupling with process to the left */ 1479 for (i = xs; i < xs + s; i++) { 1480 for (j = 0; j < nc; j++) { 1481 cnt = 0; 1482 if (rank) { 1483 for (l = 0; l < s; l++) { 1484 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1485 } 1486 } 1487 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1488 for (l = 0; l < s; l++) { 1489 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1490 } 1491 } 1492 if (dfill) { 1493 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1494 } else { 1495 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1496 } 1497 for (l = 0; l < s; l++) { 1498 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1499 } 1500 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1501 row++; 1502 } 1503 } 1504 for (i = xs + s; i < xs + nx - s; i++) { 1505 for (j = 0; j < nc; j++) { 1506 cnt = 0; 1507 for (l = 0; l < s; l++) { 1508 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1509 } 1510 if (dfill) { 1511 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1512 } else { 1513 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1514 } 1515 for (l = 0; l < s; l++) { 1516 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1517 } 1518 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1519 row++; 1520 } 1521 } 1522 /* coupling with process to the right */ 1523 for (i = xs + nx - s; i < xs + nx; i++) { 1524 for (j = 0; j < nc; j++) { 1525 cnt = 0; 1526 for (l = 0; l < s; l++) { 1527 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1528 } 1529 if (dfill) { 1530 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1531 } else { 1532 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1533 } 1534 if (rank < size - 1) { 1535 for (l = 0; l < s; l++) { 1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1537 } 1538 } 1539 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1540 for (l = 0; l < s; l++) { 1541 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1542 } 1543 } 1544 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1545 row++; 1546 } 1547 } 1548 PetscCall(PetscFree(cols)); 1549 /* do not copy values to GPU since they are all zero and not yet needed there */ 1550 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1551 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1552 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1553 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1554 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1555 } 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1560 { 1561 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1562 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1563 PetscInt istart, iend; 1564 DMBoundaryType bx; 1565 ISLocalToGlobalMapping ltog, mltog; 1566 1567 PetscFunctionBegin; 1568 /* 1569 nc - number of components per grid point 1570 col - number of colors needed in one direction for single component problem 1571 */ 1572 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1573 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1574 col = 2 * s + 1; 1575 1576 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1577 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1578 1579 PetscCall(MatSetBlockSize(J, nc)); 1580 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1581 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1582 1583 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1584 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1585 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1586 1587 /* 1588 For each node in the grid: we get the neighbors in the local (on processor ordering 1589 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1590 PETSc ordering. 1591 */ 1592 if (!da->prealloc_only) { 1593 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1594 for (i = xs; i < xs + nx; i++) { 1595 istart = PetscMax(-s, gxs - i); 1596 iend = PetscMin(s, gxs + gnx - i - 1); 1597 slot = i - gxs; 1598 1599 cnt = 0; 1600 for (i1 = istart; i1 < iend + 1; i1++) { 1601 cols[cnt++] = nc * (slot + i1); 1602 for (l = 1; l < nc; l++) { 1603 cols[cnt] = 1 + cols[cnt - 1]; 1604 cnt++; 1605 } 1606 } 1607 rows[0] = nc * (slot); 1608 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1609 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1610 } 1611 /* do not copy values to GPU since they are all zero and not yet needed there */ 1612 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1613 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1614 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1615 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1616 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1617 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1618 PetscCall(PetscFree2(rows, cols)); 1619 } 1620 PetscFunctionReturn(PETSC_SUCCESS); 1621 } 1622 1623 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1624 { 1625 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1626 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1627 PetscInt istart, iend; 1628 DMBoundaryType bx; 1629 ISLocalToGlobalMapping ltog, mltog; 1630 1631 PetscFunctionBegin; 1632 /* 1633 nc - number of components per grid point 1634 col - number of colors needed in one direction for single component problem 1635 */ 1636 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1637 col = 2 * s + 1; 1638 1639 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1640 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1641 1642 PetscCall(MatSetBlockSize(J, nc)); 1643 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1644 1645 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1646 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1647 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1648 1649 /* 1650 For each node in the grid: we get the neighbors in the local (on processor ordering 1651 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1652 PETSc ordering. 1653 */ 1654 if (!da->prealloc_only) { 1655 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1656 for (i = xs; i < xs + nx; i++) { 1657 istart = PetscMax(-s, gxs - i); 1658 iend = PetscMin(s, gxs + gnx - i - 1); 1659 slot = i - gxs; 1660 1661 cnt = 0; 1662 for (i1 = istart; i1 < iend + 1; i1++) { 1663 cols[cnt++] = nc * (slot + i1); 1664 for (l = 1; l < nc; l++) { 1665 cols[cnt] = 1 + cols[cnt - 1]; 1666 cnt++; 1667 } 1668 } 1669 rows[0] = nc * (slot); 1670 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1671 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1672 } 1673 /* do not copy values to GPU since they are all zero and not yet needed there */ 1674 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1675 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1676 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1677 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1678 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1679 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1680 PetscCall(PetscFree2(rows, cols)); 1681 } 1682 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1687 { 1688 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1689 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1690 PetscInt istart, iend, jstart, jend, ii, jj; 1691 MPI_Comm comm; 1692 PetscScalar *values; 1693 DMBoundaryType bx, by; 1694 DMDAStencilType st; 1695 ISLocalToGlobalMapping ltog; 1696 1697 PetscFunctionBegin; 1698 /* 1699 nc - number of components per grid point 1700 col - number of colors needed in one direction for single component problem 1701 */ 1702 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1703 col = 2 * s + 1; 1704 1705 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1706 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1707 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1708 1709 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1710 1711 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1712 1713 /* determine the matrix preallocation information */ 1714 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1715 for (i = xs; i < xs + nx; i++) { 1716 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1717 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1718 for (j = ys; j < ys + ny; j++) { 1719 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1720 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1721 slot = i - gxs + gnx * (j - gys); 1722 1723 /* Find block columns in block row */ 1724 cnt = 0; 1725 for (ii = istart; ii < iend + 1; ii++) { 1726 for (jj = jstart; jj < jend + 1; jj++) { 1727 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1728 cols[cnt++] = slot + ii + gnx * jj; 1729 } 1730 } 1731 } 1732 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1733 } 1734 } 1735 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1736 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1737 MatPreallocateEnd(dnz, onz); 1738 1739 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1740 1741 /* 1742 For each node in the grid: we get the neighbors in the local (on processor ordering 1743 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1744 PETSc ordering. 1745 */ 1746 if (!da->prealloc_only) { 1747 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1748 for (i = xs; i < xs + nx; i++) { 1749 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1750 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1751 for (j = ys; j < ys + ny; j++) { 1752 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1753 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1754 slot = i - gxs + gnx * (j - gys); 1755 cnt = 0; 1756 for (ii = istart; ii < iend + 1; ii++) { 1757 for (jj = jstart; jj < jend + 1; jj++) { 1758 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1759 cols[cnt++] = slot + ii + gnx * jj; 1760 } 1761 } 1762 } 1763 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1764 } 1765 } 1766 PetscCall(PetscFree(values)); 1767 /* do not copy values to GPU since they are all zero and not yet needed there */ 1768 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1769 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1770 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1771 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1772 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1773 } 1774 PetscCall(PetscFree(cols)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1779 { 1780 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1781 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1782 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1783 MPI_Comm comm; 1784 PetscScalar *values; 1785 DMBoundaryType bx, by, bz; 1786 DMDAStencilType st; 1787 ISLocalToGlobalMapping ltog; 1788 1789 PetscFunctionBegin; 1790 /* 1791 nc - number of components per grid point 1792 col - number of colors needed in one direction for single component problem 1793 */ 1794 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1795 col = 2 * s + 1; 1796 1797 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1798 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1799 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1800 1801 PetscCall(PetscMalloc1(col * col * col, &cols)); 1802 1803 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1804 1805 /* determine the matrix preallocation information */ 1806 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1807 for (i = xs; i < xs + nx; i++) { 1808 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1809 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1810 for (j = ys; j < ys + ny; j++) { 1811 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1812 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1813 for (k = zs; k < zs + nz; k++) { 1814 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1815 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1816 1817 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1818 1819 /* Find block columns in block row */ 1820 cnt = 0; 1821 for (ii = istart; ii < iend + 1; ii++) { 1822 for (jj = jstart; jj < jend + 1; jj++) { 1823 for (kk = kstart; kk < kend + 1; kk++) { 1824 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1825 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1826 } 1827 } 1828 } 1829 } 1830 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1831 } 1832 } 1833 } 1834 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1835 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1836 MatPreallocateEnd(dnz, onz); 1837 1838 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1839 1840 /* 1841 For each node in the grid: we get the neighbors in the local (on processor ordering 1842 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1843 PETSc ordering. 1844 */ 1845 if (!da->prealloc_only) { 1846 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1847 for (i = xs; i < xs + nx; i++) { 1848 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1849 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1850 for (j = ys; j < ys + ny; j++) { 1851 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1852 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1853 for (k = zs; k < zs + nz; k++) { 1854 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1855 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1856 1857 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1858 1859 cnt = 0; 1860 for (ii = istart; ii < iend + 1; ii++) { 1861 for (jj = jstart; jj < jend + 1; jj++) { 1862 for (kk = kstart; kk < kend + 1; kk++) { 1863 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1864 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1865 } 1866 } 1867 } 1868 } 1869 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1870 } 1871 } 1872 } 1873 PetscCall(PetscFree(values)); 1874 /* do not copy values to GPU since they are all zero and not yet needed there */ 1875 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1876 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1877 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1878 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1879 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1880 } 1881 PetscCall(PetscFree(cols)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /* 1886 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1887 identify in the local ordering with periodic domain. 1888 */ 1889 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1890 { 1891 PetscInt i, n; 1892 1893 PetscFunctionBegin; 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1895 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1896 for (i = 0, n = 0; i < *cnt; i++) { 1897 if (col[i] >= *row) col[n++] = col[i]; 1898 } 1899 *cnt = n; 1900 PetscFunctionReturn(PETSC_SUCCESS); 1901 } 1902 1903 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1904 { 1905 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1906 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1907 PetscInt istart, iend, jstart, jend, ii, jj; 1908 MPI_Comm comm; 1909 PetscScalar *values; 1910 DMBoundaryType bx, by; 1911 DMDAStencilType st; 1912 ISLocalToGlobalMapping ltog; 1913 1914 PetscFunctionBegin; 1915 /* 1916 nc - number of components per grid point 1917 col - number of colors needed in one direction for single component problem 1918 */ 1919 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1920 col = 2 * s + 1; 1921 1922 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1923 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1924 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1925 1926 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1927 1928 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1929 1930 /* determine the matrix preallocation information */ 1931 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1932 for (i = xs; i < xs + nx; i++) { 1933 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1934 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1935 for (j = ys; j < ys + ny; j++) { 1936 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1937 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1938 slot = i - gxs + gnx * (j - gys); 1939 1940 /* Find block columns in block row */ 1941 cnt = 0; 1942 for (ii = istart; ii < iend + 1; ii++) { 1943 for (jj = jstart; jj < jend + 1; jj++) { 1944 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1945 } 1946 } 1947 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1948 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1949 } 1950 } 1951 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1952 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1953 MatPreallocateEnd(dnz, onz); 1954 1955 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1956 1957 /* 1958 For each node in the grid: we get the neighbors in the local (on processor ordering 1959 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1960 PETSc ordering. 1961 */ 1962 if (!da->prealloc_only) { 1963 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1964 for (i = xs; i < xs + nx; i++) { 1965 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1966 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1967 for (j = ys; j < ys + ny; j++) { 1968 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1969 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1970 slot = i - gxs + gnx * (j - gys); 1971 1972 /* Find block columns in block row */ 1973 cnt = 0; 1974 for (ii = istart; ii < iend + 1; ii++) { 1975 for (jj = jstart; jj < jend + 1; jj++) { 1976 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1977 } 1978 } 1979 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1980 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1981 } 1982 } 1983 PetscCall(PetscFree(values)); 1984 /* do not copy values to GPU since they are all zero and not yet needed there */ 1985 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1986 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1987 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1988 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1989 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1990 } 1991 PetscCall(PetscFree(cols)); 1992 PetscFunctionReturn(PETSC_SUCCESS); 1993 } 1994 1995 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1996 { 1997 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1998 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1999 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2000 MPI_Comm comm; 2001 PetscScalar *values; 2002 DMBoundaryType bx, by, bz; 2003 DMDAStencilType st; 2004 ISLocalToGlobalMapping ltog; 2005 2006 PetscFunctionBegin; 2007 /* 2008 nc - number of components per grid point 2009 col - number of colors needed in one direction for single component problem 2010 */ 2011 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2012 col = 2 * s + 1; 2013 2014 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2015 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2016 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2017 2018 /* create the matrix */ 2019 PetscCall(PetscMalloc1(col * col * col, &cols)); 2020 2021 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2022 2023 /* determine the matrix preallocation information */ 2024 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2025 for (i = xs; i < xs + nx; i++) { 2026 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2027 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2028 for (j = ys; j < ys + ny; j++) { 2029 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2030 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2031 for (k = zs; k < zs + nz; k++) { 2032 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2033 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2034 2035 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2036 2037 /* Find block columns in block row */ 2038 cnt = 0; 2039 for (ii = istart; ii < iend + 1; ii++) { 2040 for (jj = jstart; jj < jend + 1; jj++) { 2041 for (kk = kstart; kk < kend + 1; kk++) { 2042 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2043 } 2044 } 2045 } 2046 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2047 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2048 } 2049 } 2050 } 2051 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2052 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2053 MatPreallocateEnd(dnz, onz); 2054 2055 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2056 2057 /* 2058 For each node in the grid: we get the neighbors in the local (on processor ordering 2059 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2060 PETSc ordering. 2061 */ 2062 if (!da->prealloc_only) { 2063 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2064 for (i = xs; i < xs + nx; i++) { 2065 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2066 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2067 for (j = ys; j < ys + ny; j++) { 2068 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2069 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2070 for (k = zs; k < zs + nz; k++) { 2071 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2072 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2073 2074 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2075 2076 cnt = 0; 2077 for (ii = istart; ii < iend + 1; ii++) { 2078 for (jj = jstart; jj < jend + 1; jj++) { 2079 for (kk = kstart; kk < kend + 1; kk++) { 2080 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2081 } 2082 } 2083 } 2084 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2085 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2086 } 2087 } 2088 } 2089 PetscCall(PetscFree(values)); 2090 /* do not copy values to GPU since they are all zero and not yet needed there */ 2091 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2092 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2093 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2094 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2095 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2096 } 2097 PetscCall(PetscFree(cols)); 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2102 { 2103 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2104 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2105 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2106 DM_DA *dd = (DM_DA *)da->data; 2107 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2108 MPI_Comm comm; 2109 PetscScalar *values; 2110 DMBoundaryType bx, by, bz; 2111 ISLocalToGlobalMapping ltog; 2112 DMDAStencilType st; 2113 PetscBool removedups = PETSC_FALSE; 2114 2115 PetscFunctionBegin; 2116 /* 2117 nc - number of components per grid point 2118 col - number of colors needed in one direction for single component problem 2119 */ 2120 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2121 col = 2 * s + 1; 2122 2123 /* 2124 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2125 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2126 */ 2127 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2128 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2129 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2130 2131 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2132 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2133 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2134 2135 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2136 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2137 2138 /* determine the matrix preallocation information */ 2139 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2140 2141 PetscCall(MatSetBlockSize(J, nc)); 2142 for (i = xs; i < xs + nx; i++) { 2143 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2144 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2145 for (j = ys; j < ys + ny; j++) { 2146 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2147 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2148 for (k = zs; k < zs + nz; k++) { 2149 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2150 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2151 2152 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2153 2154 for (l = 0; l < nc; l++) { 2155 cnt = 0; 2156 for (ii = istart; ii < iend + 1; ii++) { 2157 for (jj = jstart; jj < jend + 1; jj++) { 2158 for (kk = kstart; kk < kend + 1; kk++) { 2159 if (ii || jj || kk) { 2160 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2161 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2162 } 2163 } else { 2164 if (dfill) { 2165 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2166 } else { 2167 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2168 } 2169 } 2170 } 2171 } 2172 } 2173 row = l + nc * (slot); 2174 maxcnt = PetscMax(maxcnt, cnt); 2175 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2176 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2177 } 2178 } 2179 } 2180 } 2181 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2182 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2183 MatPreallocateEnd(dnz, onz); 2184 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2185 2186 /* 2187 For each node in the grid: we get the neighbors in the local (on processor ordering 2188 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2189 PETSc ordering. 2190 */ 2191 if (!da->prealloc_only) { 2192 PetscCall(PetscCalloc1(maxcnt, &values)); 2193 for (i = xs; i < xs + nx; i++) { 2194 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2195 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2196 for (j = ys; j < ys + ny; j++) { 2197 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2198 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2199 for (k = zs; k < zs + nz; k++) { 2200 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2201 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2202 2203 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2204 2205 for (l = 0; l < nc; l++) { 2206 cnt = 0; 2207 for (ii = istart; ii < iend + 1; ii++) { 2208 for (jj = jstart; jj < jend + 1; jj++) { 2209 for (kk = kstart; kk < kend + 1; kk++) { 2210 if (ii || jj || kk) { 2211 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2212 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2213 } 2214 } else { 2215 if (dfill) { 2216 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2217 } else { 2218 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2219 } 2220 } 2221 } 2222 } 2223 } 2224 row = l + nc * (slot); 2225 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2226 } 2227 } 2228 } 2229 } 2230 PetscCall(PetscFree(values)); 2231 /* do not copy values to GPU since they are all zero and not yet needed there */ 2232 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2233 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2234 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2235 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2236 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2237 } 2238 PetscCall(PetscFree(cols)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241