1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective on da 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(0); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective on da 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for dfill and ofill is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the dfill, ofill format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(0); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else if (dim > 1) { 227 if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) { 228 SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 229 } 230 } 231 } 232 233 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 234 matrices is for the blocks, not the individual matrix elements */ 235 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 236 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 237 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 238 if (isBAIJ) { 239 dd->w = 1; 240 dd->xs = dd->xs / nc; 241 dd->xe = dd->xe / nc; 242 dd->Xs = dd->Xs / nc; 243 dd->Xe = dd->Xe / nc; 244 } 245 246 /* 247 We do not provide a getcoloring function in the DMDA operations because 248 the basic DMDA does not know about matrices. We think of DMDA as being 249 more low-level then matrices. 250 */ 251 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 252 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 253 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 254 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 255 if (isBAIJ) { 256 dd->w = nc; 257 dd->xs = dd->xs * nc; 258 dd->xe = dd->xe * nc; 259 dd->Xs = dd->Xs * nc; 260 dd->Xe = dd->Xe * nc; 261 } 262 PetscFunctionReturn(0); 263 } 264 265 /* ---------------------------------------------------------------------------------*/ 266 267 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 268 { 269 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 270 PetscInt ncolors; 271 MPI_Comm comm; 272 DMBoundaryType bx, by; 273 DMDAStencilType st; 274 ISColoringValue *colors; 275 DM_DA *dd = (DM_DA *)da->data; 276 277 PetscFunctionBegin; 278 /* 279 nc - number of components per grid point 280 col - number of colors needed in one direction for single component problem 281 282 */ 283 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 284 col = 2 * s + 1; 285 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 286 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 287 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 288 289 /* special case as taught to us by Paul Hovland */ 290 if (st == DMDA_STENCIL_STAR && s == 1) { 291 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 292 } else { 293 if (ctype == IS_COLORING_GLOBAL) { 294 if (!dd->localcoloring) { 295 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 296 ii = 0; 297 for (j = ys; j < ys + ny; j++) { 298 for (i = xs; i < xs + nx; i++) { 299 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 300 } 301 } 302 ncolors = nc + nc * (col - 1 + col * (col - 1)); 303 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 304 } 305 *coloring = dd->localcoloring; 306 } else if (ctype == IS_COLORING_LOCAL) { 307 if (!dd->ghostedcoloring) { 308 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 309 ii = 0; 310 for (j = gys; j < gys + gny; j++) { 311 for (i = gxs; i < gxs + gnx; i++) { 312 for (k = 0; k < nc; k++) { 313 /* the complicated stuff is to handle periodic boundaries */ 314 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 315 } 316 } 317 } 318 ncolors = nc + nc * (col - 1 + col * (col - 1)); 319 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 320 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 321 322 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 323 } 324 *coloring = dd->ghostedcoloring; 325 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 326 } 327 PetscCall(ISColoringReference(*coloring)); 328 PetscFunctionReturn(0); 329 } 330 331 /* ---------------------------------------------------------------------------------*/ 332 333 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 334 { 335 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 336 PetscInt ncolors; 337 MPI_Comm comm; 338 DMBoundaryType bx, by, bz; 339 DMDAStencilType st; 340 ISColoringValue *colors; 341 DM_DA *dd = (DM_DA *)da->data; 342 343 PetscFunctionBegin; 344 /* 345 nc - number of components per grid point 346 col - number of colors needed in one direction for single component problem 347 348 */ 349 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 350 col = 2 * s + 1; 351 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 352 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 353 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 354 355 /* create the coloring */ 356 if (ctype == IS_COLORING_GLOBAL) { 357 if (!dd->localcoloring) { 358 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 359 ii = 0; 360 for (k = zs; k < zs + nz; k++) { 361 for (j = ys; j < ys + ny; j++) { 362 for (i = xs; i < xs + nx; i++) { 363 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 364 } 365 } 366 } 367 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 368 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 369 } 370 *coloring = dd->localcoloring; 371 } else if (ctype == IS_COLORING_LOCAL) { 372 if (!dd->ghostedcoloring) { 373 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 374 ii = 0; 375 for (k = gzs; k < gzs + gnz; k++) { 376 for (j = gys; j < gys + gny; j++) { 377 for (i = gxs; i < gxs + gnx; i++) { 378 for (l = 0; l < nc; l++) { 379 /* the complicated stuff is to handle periodic boundaries */ 380 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 386 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 387 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 388 } 389 *coloring = dd->ghostedcoloring; 390 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 391 PetscCall(ISColoringReference(*coloring)); 392 PetscFunctionReturn(0); 393 } 394 395 /* ---------------------------------------------------------------------------------*/ 396 397 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 398 { 399 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 400 PetscInt ncolors; 401 MPI_Comm comm; 402 DMBoundaryType bx; 403 ISColoringValue *colors; 404 DM_DA *dd = (DM_DA *)da->data; 405 406 PetscFunctionBegin; 407 /* 408 nc - number of components per grid point 409 col - number of colors needed in one direction for single component problem 410 411 */ 412 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 413 col = 2 * s + 1; 414 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 415 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 416 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 417 418 /* create the coloring */ 419 if (ctype == IS_COLORING_GLOBAL) { 420 if (!dd->localcoloring) { 421 PetscCall(PetscMalloc1(nc * nx, &colors)); 422 if (dd->ofillcols) { 423 PetscInt tc = 0; 424 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 425 i1 = 0; 426 for (i = xs; i < xs + nx; i++) { 427 for (l = 0; l < nc; l++) { 428 if (dd->ofillcols[l] && (i % col)) { 429 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 430 } else { 431 colors[i1++] = l; 432 } 433 } 434 } 435 ncolors = nc + 2 * s * tc; 436 } else { 437 i1 = 0; 438 for (i = xs; i < xs + nx; i++) { 439 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 440 } 441 ncolors = nc + nc * (col - 1); 442 } 443 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 444 } 445 *coloring = dd->localcoloring; 446 } else if (ctype == IS_COLORING_LOCAL) { 447 if (!dd->ghostedcoloring) { 448 PetscCall(PetscMalloc1(nc * gnx, &colors)); 449 i1 = 0; 450 for (i = gxs; i < gxs + gnx; i++) { 451 for (l = 0; l < nc; l++) { 452 /* the complicated stuff is to handle periodic boundaries */ 453 colors[i1++] = l + nc * (SetInRange(i, m) % col); 454 } 455 } 456 ncolors = nc + nc * (col - 1); 457 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 458 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 459 } 460 *coloring = dd->ghostedcoloring; 461 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 462 PetscCall(ISColoringReference(*coloring)); 463 PetscFunctionReturn(0); 464 } 465 466 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 467 { 468 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 469 PetscInt ncolors; 470 MPI_Comm comm; 471 DMBoundaryType bx, by; 472 ISColoringValue *colors; 473 DM_DA *dd = (DM_DA *)da->data; 474 475 PetscFunctionBegin; 476 /* 477 nc - number of components per grid point 478 col - number of colors needed in one direction for single component problem 479 480 */ 481 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 482 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 483 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 484 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 485 /* create the coloring */ 486 if (ctype == IS_COLORING_GLOBAL) { 487 if (!dd->localcoloring) { 488 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 489 ii = 0; 490 for (j = ys; j < ys + ny; j++) { 491 for (i = xs; i < xs + nx; i++) { 492 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 493 } 494 } 495 ncolors = 5 * nc; 496 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 497 } 498 *coloring = dd->localcoloring; 499 } else if (ctype == IS_COLORING_LOCAL) { 500 if (!dd->ghostedcoloring) { 501 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 502 ii = 0; 503 for (j = gys; j < gys + gny; j++) { 504 for (i = gxs; i < gxs + gnx; i++) { 505 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 506 } 507 } 508 ncolors = 5 * nc; 509 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 510 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 511 } 512 *coloring = dd->ghostedcoloring; 513 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 514 PetscFunctionReturn(0); 515 } 516 517 /* =========================================================================== */ 518 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 529 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 530 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 531 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 532 533 /*@C 534 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 535 536 Logically Collective on mat 537 538 Input Parameters: 539 + mat - the matrix 540 - da - the da 541 542 Level: intermediate 543 544 .seealso: `Mat`, `MatSetUp()` 545 @*/ 546 PetscErrorCode MatSetupDM(Mat mat, DM da) 547 { 548 PetscFunctionBegin; 549 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 550 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 551 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 552 PetscFunctionReturn(0); 553 } 554 555 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 556 { 557 DM da; 558 const char *prefix; 559 Mat Anatural; 560 AO ao; 561 PetscInt rstart, rend, *petsc, i; 562 IS is; 563 MPI_Comm comm; 564 PetscViewerFormat format; 565 566 PetscFunctionBegin; 567 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 568 PetscCall(PetscViewerGetFormat(viewer, &format)); 569 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 570 571 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 572 PetscCall(MatGetDM(A, &da)); 573 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 574 575 PetscCall(DMDAGetAO(da, &ao)); 576 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 577 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 578 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 579 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 580 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 581 582 /* call viewer on natural ordering */ 583 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 584 PetscCall(ISDestroy(&is)); 585 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 586 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 587 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 588 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 589 PetscCall(MatView(Anatural, viewer)); 590 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 591 PetscCall(MatDestroy(&Anatural)); 592 PetscFunctionReturn(0); 593 } 594 595 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 596 { 597 DM da; 598 Mat Anatural, Aapp; 599 AO ao; 600 PetscInt rstart, rend, *app, i, m, n, M, N; 601 IS is; 602 MPI_Comm comm; 603 604 PetscFunctionBegin; 605 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 606 PetscCall(MatGetDM(A, &da)); 607 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 608 609 /* Load the matrix in natural ordering */ 610 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 611 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 612 PetscCall(MatGetSize(A, &M, &N)); 613 PetscCall(MatGetLocalSize(A, &m, &n)); 614 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 615 PetscCall(MatLoad(Anatural, viewer)); 616 617 /* Map natural ordering to application ordering and create IS */ 618 PetscCall(DMDAGetAO(da, &ao)); 619 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 620 PetscCall(PetscMalloc1(rend - rstart, &app)); 621 for (i = rstart; i < rend; i++) app[i - rstart] = i; 622 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 623 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 624 625 /* Do permutation and replace header */ 626 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 627 PetscCall(MatHeaderReplace(A, &Aapp)); 628 PetscCall(ISDestroy(&is)); 629 PetscCall(MatDestroy(&Anatural)); 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 634 { 635 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 636 Mat A; 637 MPI_Comm comm; 638 MatType Atype; 639 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 640 MatType mtype; 641 PetscMPIInt size; 642 DM_DA *dd = (DM_DA *)da->data; 643 644 PetscFunctionBegin; 645 PetscCall(MatInitializePackage()); 646 mtype = da->mattype; 647 648 /* 649 m 650 ------------------------------------------------------ 651 | | 652 | | 653 | ---------------------- | 654 | | | | 655 n | ny | | | 656 | | | | 657 | .--------------------- | 658 | (xs,ys) nx | 659 | . | 660 | (gxs,gys) | 661 | | 662 ----------------------------------------------------- 663 */ 664 665 /* 666 nc - number of components per grid point 667 col - number of colors needed in one direction for single component problem 668 669 */ 670 M = dd->M; 671 N = dd->N; 672 P = dd->P; 673 dim = da->dim; 674 dof = dd->w; 675 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 676 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 677 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 678 PetscCall(MatCreate(comm, &A)); 679 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 680 PetscCall(MatSetType(A, mtype)); 681 PetscCall(MatSetFromOptions(A)); 682 if (dof * nx * ny * nz < da->bind_below) { 683 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 684 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 685 } 686 PetscCall(MatSetDM(A, da)); 687 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 688 PetscCall(MatGetType(A, &Atype)); 689 /* 690 We do not provide a getmatrix function in the DMDA operations because 691 the basic DMDA does not know about matrices. We think of DMDA as being more 692 more low-level than matrices. This is kind of cheating but, cause sometimes 693 we think of DMDA has higher level than matrices. 694 695 We could switch based on Atype (or mtype), but we do not since the 696 specialized setting routines depend only on the particular preallocation 697 details of the matrix, not the type itself. 698 */ 699 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 700 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 701 if (!aij) { 702 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 703 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 704 if (!baij) { 705 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 706 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 707 if (!sbaij) { 708 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 709 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 710 } 711 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 712 } 713 } 714 if (aij) { 715 if (dim == 1) { 716 if (dd->ofill) { 717 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 718 } else { 719 DMBoundaryType bx; 720 PetscMPIInt size; 721 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 722 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 723 if (size == 1 && bx == DM_BOUNDARY_NONE) { 724 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 725 } else { 726 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 727 } 728 } 729 } else if (dim == 2) { 730 if (dd->ofill) { 731 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 732 } else { 733 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 734 } 735 } else if (dim == 3) { 736 if (dd->ofill) { 737 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 738 } else { 739 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 740 } 741 } 742 } else if (baij) { 743 if (dim == 2) { 744 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 745 } else if (dim == 3) { 746 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 747 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 748 } else if (sbaij) { 749 if (dim == 2) { 750 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 751 } else if (dim == 3) { 752 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 753 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 754 } else if (sell) { 755 if (dim == 2) { 756 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 757 } else if (dim == 3) { 758 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 759 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 760 } else if (is) { 761 PetscCall(DMCreateMatrix_DA_IS(da, A)); 762 } else { 763 ISLocalToGlobalMapping ltog; 764 765 PetscCall(MatSetBlockSize(A, dof)); 766 PetscCall(MatSetUp(A)); 767 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 768 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 769 } 770 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 771 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 772 PetscCall(MatSetDM(A, da)); 773 PetscCallMPI(MPI_Comm_size(comm, &size)); 774 if (size > 1) { 775 /* change viewer to display matrix in natural ordering */ 776 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 777 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 778 } 779 *J = A; 780 PetscFunctionReturn(0); 781 } 782 783 /* ---------------------------------------------------------------------------------*/ 784 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 785 786 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 787 { 788 DM_DA *da = (DM_DA *)dm->data; 789 Mat lJ, P; 790 ISLocalToGlobalMapping ltog; 791 IS is; 792 PetscBT bt; 793 const PetscInt *e_loc, *idx; 794 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 795 796 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 797 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 798 PetscFunctionBegin; 799 dof = da->w; 800 PetscCall(MatSetBlockSize(J, dof)); 801 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 802 803 /* flag local elements indices in local DMDA numbering */ 804 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 805 PetscCall(PetscBTCreate(nv / dof, &bt)); 806 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 807 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 808 809 /* filter out (set to -1) the global indices not used by the local elements */ 810 PetscCall(PetscMalloc1(nv / dof, &gidx)); 811 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 812 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 813 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 814 for (i = 0; i < nv / dof; i++) 815 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 816 PetscCall(PetscBTDestroy(&bt)); 817 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 818 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 819 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 820 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 821 PetscCall(ISDestroy(&is)); 822 823 /* Preallocation */ 824 if (dm->prealloc_skip) { 825 PetscCall(MatSetUp(J)); 826 } else { 827 PetscCall(MatISGetLocalMat(J, &lJ)); 828 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 829 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 830 PetscCall(MatSetType(P, MATPREALLOCATOR)); 831 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 832 PetscCall(MatGetSize(lJ, &N, NULL)); 833 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 834 PetscCall(MatSetSizes(P, n, n, N, N)); 835 PetscCall(MatSetBlockSize(P, dof)); 836 PetscCall(MatSetUp(P)); 837 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 838 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 839 PetscCall(MatISRestoreLocalMat(J, &lJ)); 840 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 841 PetscCall(MatDestroy(&P)); 842 843 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 844 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 850 { 851 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 852 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 853 MPI_Comm comm; 854 PetscScalar *values; 855 DMBoundaryType bx, by; 856 ISLocalToGlobalMapping ltog; 857 DMDAStencilType st; 858 859 PetscFunctionBegin; 860 /* 861 nc - number of components per grid point 862 col - number of colors needed in one direction for single component problem 863 864 */ 865 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 866 col = 2 * s + 1; 867 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 868 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 869 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 870 871 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 872 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 873 874 PetscCall(MatSetBlockSize(J, nc)); 875 /* determine the matrix preallocation information */ 876 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 877 for (i = xs; i < xs + nx; i++) { 878 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 879 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 880 881 for (j = ys; j < ys + ny; j++) { 882 slot = i - gxs + gnx * (j - gys); 883 884 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 885 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 886 887 cnt = 0; 888 for (k = 0; k < nc; k++) { 889 for (l = lstart; l < lend + 1; l++) { 890 for (p = pstart; p < pend + 1; p++) { 891 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 892 cols[cnt++] = k + nc * (slot + gnx * l + p); 893 } 894 } 895 } 896 rows[k] = k + nc * (slot); 897 } 898 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 899 } 900 } 901 PetscCall(MatSetBlockSize(J, nc)); 902 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 903 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 904 MatPreallocateEnd(dnz, onz); 905 906 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 907 908 /* 909 For each node in the grid: we get the neighbors in the local (on processor ordering 910 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 911 PETSc ordering. 912 */ 913 if (!da->prealloc_only) { 914 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 915 for (i = xs; i < xs + nx; i++) { 916 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 917 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 918 919 for (j = ys; j < ys + ny; j++) { 920 slot = i - gxs + gnx * (j - gys); 921 922 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 923 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 924 925 cnt = 0; 926 for (k = 0; k < nc; k++) { 927 for (l = lstart; l < lend + 1; l++) { 928 for (p = pstart; p < pend + 1; p++) { 929 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 930 cols[cnt++] = k + nc * (slot + gnx * l + p); 931 } 932 } 933 } 934 rows[k] = k + nc * (slot); 935 } 936 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 937 } 938 } 939 PetscCall(PetscFree(values)); 940 /* do not copy values to GPU since they are all zero and not yet needed there */ 941 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 942 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 943 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 944 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 945 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 946 } 947 PetscCall(PetscFree2(rows, cols)); 948 PetscFunctionReturn(0); 949 } 950 951 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 952 { 953 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 954 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 955 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 956 MPI_Comm comm; 957 PetscScalar *values; 958 DMBoundaryType bx, by, bz; 959 ISLocalToGlobalMapping ltog; 960 DMDAStencilType st; 961 962 PetscFunctionBegin; 963 /* 964 nc - number of components per grid point 965 col - number of colors needed in one direction for single component problem 966 967 */ 968 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 969 col = 2 * s + 1; 970 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 971 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 972 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 973 974 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 975 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 976 977 PetscCall(MatSetBlockSize(J, nc)); 978 /* determine the matrix preallocation information */ 979 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 980 for (i = xs; i < xs + nx; i++) { 981 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 982 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 983 for (j = ys; j < ys + ny; j++) { 984 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 985 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 986 for (k = zs; k < zs + nz; k++) { 987 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 988 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 989 990 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 991 992 cnt = 0; 993 for (l = 0; l < nc; l++) { 994 for (ii = istart; ii < iend + 1; ii++) { 995 for (jj = jstart; jj < jend + 1; jj++) { 996 for (kk = kstart; kk < kend + 1; kk++) { 997 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 998 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 999 } 1000 } 1001 } 1002 } 1003 rows[l] = l + nc * (slot); 1004 } 1005 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1006 } 1007 } 1008 } 1009 PetscCall(MatSetBlockSize(J, nc)); 1010 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1011 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1012 MatPreallocateEnd(dnz, onz); 1013 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1014 1015 /* 1016 For each node in the grid: we get the neighbors in the local (on processor ordering 1017 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1018 PETSc ordering. 1019 */ 1020 if (!da->prealloc_only) { 1021 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1022 for (i = xs; i < xs + nx; i++) { 1023 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1024 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1025 for (j = ys; j < ys + ny; j++) { 1026 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1027 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1028 for (k = zs; k < zs + nz; k++) { 1029 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1030 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1031 1032 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1033 1034 cnt = 0; 1035 for (l = 0; l < nc; l++) { 1036 for (ii = istart; ii < iend + 1; ii++) { 1037 for (jj = jstart; jj < jend + 1; jj++) { 1038 for (kk = kstart; kk < kend + 1; kk++) { 1039 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1040 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1041 } 1042 } 1043 } 1044 } 1045 rows[l] = l + nc * (slot); 1046 } 1047 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1048 } 1049 } 1050 } 1051 PetscCall(PetscFree(values)); 1052 /* do not copy values to GPU since they are all zero and not yet needed there */ 1053 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1054 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1055 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1056 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1057 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1058 } 1059 PetscCall(PetscFree2(rows, cols)); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1064 { 1065 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1066 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1067 MPI_Comm comm; 1068 DMBoundaryType bx, by; 1069 ISLocalToGlobalMapping ltog, mltog; 1070 DMDAStencilType st; 1071 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1072 1073 PetscFunctionBegin; 1074 /* 1075 nc - number of components per grid point 1076 col - number of colors needed in one direction for single component problem 1077 1078 */ 1079 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1080 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1081 col = 2 * s + 1; 1082 /* 1083 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1084 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1085 */ 1086 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1087 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1088 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1089 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1090 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1091 1092 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1093 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1094 1095 PetscCall(MatSetBlockSize(J, nc)); 1096 /* determine the matrix preallocation information */ 1097 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1098 for (i = xs; i < xs + nx; i++) { 1099 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1100 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1101 1102 for (j = ys; j < ys + ny; j++) { 1103 slot = i - gxs + gnx * (j - gys); 1104 1105 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1106 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1107 1108 cnt = 0; 1109 for (k = 0; k < nc; k++) { 1110 for (l = lstart; l < lend + 1; l++) { 1111 for (p = pstart; p < pend + 1; p++) { 1112 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1113 cols[cnt++] = k + nc * (slot + gnx * l + p); 1114 } 1115 } 1116 } 1117 rows[k] = k + nc * (slot); 1118 } 1119 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1120 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1121 } 1122 } 1123 PetscCall(MatSetBlockSize(J, nc)); 1124 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1125 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1126 MatPreallocateEnd(dnz, onz); 1127 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1128 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1129 1130 /* 1131 For each node in the grid: we get the neighbors in the local (on processor ordering 1132 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1133 PETSc ordering. 1134 */ 1135 if (!da->prealloc_only) { 1136 for (i = xs; i < xs + nx; i++) { 1137 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1138 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1139 1140 for (j = ys; j < ys + ny; j++) { 1141 slot = i - gxs + gnx * (j - gys); 1142 1143 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1144 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1145 1146 cnt = 0; 1147 for (l = lstart; l < lend + 1; l++) { 1148 for (p = pstart; p < pend + 1; p++) { 1149 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1150 cols[cnt++] = nc * (slot + gnx * l + p); 1151 for (k = 1; k < nc; k++) { 1152 cols[cnt] = 1 + cols[cnt - 1]; 1153 cnt++; 1154 } 1155 } 1156 } 1157 } 1158 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1159 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1160 } 1161 } 1162 /* do not copy values to GPU since they are all zero and not yet needed there */ 1163 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1164 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1165 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1166 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1167 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1168 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1169 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1170 } 1171 PetscCall(PetscFree2(rows, cols)); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1176 { 1177 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1178 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1179 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1180 DM_DA *dd = (DM_DA *)da->data; 1181 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1182 MPI_Comm comm; 1183 DMBoundaryType bx, by; 1184 ISLocalToGlobalMapping ltog; 1185 DMDAStencilType st; 1186 PetscBool removedups = PETSC_FALSE; 1187 1188 PetscFunctionBegin; 1189 /* 1190 nc - number of components per grid point 1191 col - number of colors needed in one direction for single component problem 1192 1193 */ 1194 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1195 col = 2 * s + 1; 1196 /* 1197 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1198 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1199 */ 1200 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1201 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1202 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1203 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1204 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1205 1206 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1207 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1208 1209 PetscCall(MatSetBlockSize(J, nc)); 1210 /* determine the matrix preallocation information */ 1211 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1212 for (i = xs; i < xs + nx; i++) { 1213 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1214 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1215 1216 for (j = ys; j < ys + ny; j++) { 1217 slot = i - gxs + gnx * (j - gys); 1218 1219 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1220 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1221 1222 for (k = 0; k < nc; k++) { 1223 cnt = 0; 1224 for (l = lstart; l < lend + 1; l++) { 1225 for (p = pstart; p < pend + 1; p++) { 1226 if (l || p) { 1227 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1228 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1229 } 1230 } else { 1231 if (dfill) { 1232 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1233 } else { 1234 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1235 } 1236 } 1237 } 1238 } 1239 row = k + nc * (slot); 1240 maxcnt = PetscMax(maxcnt, cnt); 1241 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1242 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1243 } 1244 } 1245 } 1246 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1247 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1248 MatPreallocateEnd(dnz, onz); 1249 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1250 1251 /* 1252 For each node in the grid: we get the neighbors in the local (on processor ordering 1253 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1254 PETSc ordering. 1255 */ 1256 if (!da->prealloc_only) { 1257 for (i = xs; i < xs + nx; i++) { 1258 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1259 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1260 1261 for (j = ys; j < ys + ny; j++) { 1262 slot = i - gxs + gnx * (j - gys); 1263 1264 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1265 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1266 1267 for (k = 0; k < nc; k++) { 1268 cnt = 0; 1269 for (l = lstart; l < lend + 1; l++) { 1270 for (p = pstart; p < pend + 1; p++) { 1271 if (l || p) { 1272 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1273 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1274 } 1275 } else { 1276 if (dfill) { 1277 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1278 } else { 1279 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1280 } 1281 } 1282 } 1283 } 1284 row = k + nc * (slot); 1285 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1286 } 1287 } 1288 } 1289 /* do not copy values to GPU since they are all zero and not yet needed there */ 1290 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1291 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1292 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1293 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1294 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1295 } 1296 PetscCall(PetscFree(cols)); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 /* ---------------------------------------------------------------------------------*/ 1301 1302 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1303 { 1304 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1305 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1306 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1307 MPI_Comm comm; 1308 DMBoundaryType bx, by, bz; 1309 ISLocalToGlobalMapping ltog, mltog; 1310 DMDAStencilType st; 1311 PetscBool removedups = PETSC_FALSE; 1312 1313 PetscFunctionBegin; 1314 /* 1315 nc - number of components per grid point 1316 col - number of colors needed in one direction for single component problem 1317 1318 */ 1319 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1320 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1321 col = 2 * s + 1; 1322 1323 /* 1324 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1325 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1326 */ 1327 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1328 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1329 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1330 1331 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1332 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1333 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1334 1335 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1336 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1337 1338 PetscCall(MatSetBlockSize(J, nc)); 1339 /* determine the matrix preallocation information */ 1340 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1341 for (i = xs; i < xs + nx; i++) { 1342 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1343 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1344 for (j = ys; j < ys + ny; j++) { 1345 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1346 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1347 for (k = zs; k < zs + nz; k++) { 1348 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1349 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1350 1351 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1352 1353 cnt = 0; 1354 for (l = 0; l < nc; l++) { 1355 for (ii = istart; ii < iend + 1; ii++) { 1356 for (jj = jstart; jj < jend + 1; jj++) { 1357 for (kk = kstart; kk < kend + 1; kk++) { 1358 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1359 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1360 } 1361 } 1362 } 1363 } 1364 rows[l] = l + nc * (slot); 1365 } 1366 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1367 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1368 } 1369 } 1370 } 1371 PetscCall(MatSetBlockSize(J, nc)); 1372 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1373 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1374 MatPreallocateEnd(dnz, onz); 1375 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1376 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1377 1378 /* 1379 For each node in the grid: we get the neighbors in the local (on processor ordering 1380 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1381 PETSc ordering. 1382 */ 1383 if (!da->prealloc_only) { 1384 for (i = xs; i < xs + nx; i++) { 1385 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1386 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1387 for (j = ys; j < ys + ny; j++) { 1388 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1389 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1390 for (k = zs; k < zs + nz; k++) { 1391 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1392 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1393 1394 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1395 1396 cnt = 0; 1397 for (kk = kstart; kk < kend + 1; kk++) { 1398 for (jj = jstart; jj < jend + 1; jj++) { 1399 for (ii = istart; ii < iend + 1; ii++) { 1400 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1401 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1402 for (l = 1; l < nc; l++) { 1403 cols[cnt] = 1 + cols[cnt - 1]; 1404 cnt++; 1405 } 1406 } 1407 } 1408 } 1409 } 1410 rows[0] = nc * (slot); 1411 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1412 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1413 } 1414 } 1415 } 1416 /* do not copy values to GPU since they are all zero and not yet needed there */ 1417 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1418 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1419 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1420 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1421 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1422 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1423 } 1424 PetscCall(PetscFree2(rows, cols)); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 /* ---------------------------------------------------------------------------------*/ 1429 1430 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1431 { 1432 DM_DA *dd = (DM_DA *)da->data; 1433 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1434 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1435 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1436 DMBoundaryType bx; 1437 ISLocalToGlobalMapping ltog; 1438 PetscMPIInt rank, size; 1439 1440 PetscFunctionBegin; 1441 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1442 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1443 1444 /* 1445 nc - number of components per grid point 1446 1447 */ 1448 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1449 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1450 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1451 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1452 1453 PetscCall(MatSetBlockSize(J, nc)); 1454 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1455 1456 /* 1457 note should be smaller for first and last process with no periodic 1458 does not handle dfill 1459 */ 1460 cnt = 0; 1461 /* coupling with process to the left */ 1462 for (i = 0; i < s; i++) { 1463 for (j = 0; j < nc; j++) { 1464 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1465 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1466 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1467 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1468 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1469 } 1470 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1471 cnt++; 1472 } 1473 } 1474 for (i = s; i < nx - s; i++) { 1475 for (j = 0; j < nc; j++) { 1476 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1477 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1478 cnt++; 1479 } 1480 } 1481 /* coupling with process to the right */ 1482 for (i = nx - s; i < nx; i++) { 1483 for (j = 0; j < nc; j++) { 1484 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1485 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1486 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1487 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1488 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1489 } 1490 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1491 cnt++; 1492 } 1493 } 1494 1495 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1496 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1497 PetscCall(PetscFree2(cols, ocols)); 1498 1499 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1500 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1501 1502 /* 1503 For each node in the grid: we get the neighbors in the local (on processor ordering 1504 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1505 PETSc ordering. 1506 */ 1507 if (!da->prealloc_only) { 1508 PetscCall(PetscMalloc1(maxcnt, &cols)); 1509 row = xs * nc; 1510 /* coupling with process to the left */ 1511 for (i = xs; i < xs + s; i++) { 1512 for (j = 0; j < nc; j++) { 1513 cnt = 0; 1514 if (rank) { 1515 for (l = 0; l < s; l++) { 1516 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1517 } 1518 } 1519 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1520 for (l = 0; l < s; l++) { 1521 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1522 } 1523 } 1524 if (dfill) { 1525 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1526 } else { 1527 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1528 } 1529 for (l = 0; l < s; l++) { 1530 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1531 } 1532 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1533 row++; 1534 } 1535 } 1536 for (i = xs + s; i < xs + nx - s; i++) { 1537 for (j = 0; j < nc; j++) { 1538 cnt = 0; 1539 for (l = 0; l < s; l++) { 1540 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1541 } 1542 if (dfill) { 1543 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1544 } else { 1545 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1546 } 1547 for (l = 0; l < s; l++) { 1548 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1549 } 1550 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1551 row++; 1552 } 1553 } 1554 /* coupling with process to the right */ 1555 for (i = xs + nx - s; i < xs + nx; i++) { 1556 for (j = 0; j < nc; j++) { 1557 cnt = 0; 1558 for (l = 0; l < s; l++) { 1559 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1560 } 1561 if (dfill) { 1562 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1563 } else { 1564 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1565 } 1566 if (rank < size - 1) { 1567 for (l = 0; l < s; l++) { 1568 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1569 } 1570 } 1571 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1572 for (l = 0; l < s; l++) { 1573 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1574 } 1575 } 1576 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1577 row++; 1578 } 1579 } 1580 PetscCall(PetscFree(cols)); 1581 /* do not copy values to GPU since they are all zero and not yet needed there */ 1582 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1583 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1584 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1585 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1586 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 /* ---------------------------------------------------------------------------------*/ 1592 1593 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1594 { 1595 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1596 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1597 PetscInt istart, iend; 1598 DMBoundaryType bx; 1599 ISLocalToGlobalMapping ltog, mltog; 1600 1601 PetscFunctionBegin; 1602 /* 1603 nc - number of components per grid point 1604 col - number of colors needed in one direction for single component problem 1605 1606 */ 1607 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1608 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1609 col = 2 * s + 1; 1610 1611 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1612 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1613 1614 PetscCall(MatSetBlockSize(J, nc)); 1615 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1616 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1617 1618 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1619 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1620 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1621 1622 /* 1623 For each node in the grid: we get the neighbors in the local (on processor ordering 1624 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1625 PETSc ordering. 1626 */ 1627 if (!da->prealloc_only) { 1628 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1629 for (i = xs; i < xs + nx; i++) { 1630 istart = PetscMax(-s, gxs - i); 1631 iend = PetscMin(s, gxs + gnx - i - 1); 1632 slot = i - gxs; 1633 1634 cnt = 0; 1635 for (i1 = istart; i1 < iend + 1; i1++) { 1636 cols[cnt++] = nc * (slot + i1); 1637 for (l = 1; l < nc; l++) { 1638 cols[cnt] = 1 + cols[cnt - 1]; 1639 cnt++; 1640 } 1641 } 1642 rows[0] = nc * (slot); 1643 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1644 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1645 } 1646 /* do not copy values to GPU since they are all zero and not yet needed there */ 1647 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1648 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1650 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1651 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1652 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1653 PetscCall(PetscFree2(rows, cols)); 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 /* ---------------------------------------------------------------------------------*/ 1659 1660 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1661 { 1662 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1663 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1664 PetscInt istart, iend; 1665 DMBoundaryType bx; 1666 ISLocalToGlobalMapping ltog, mltog; 1667 1668 PetscFunctionBegin; 1669 /* 1670 nc - number of components per grid point 1671 col - number of colors needed in one direction for single component problem 1672 */ 1673 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1674 col = 2 * s + 1; 1675 1676 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1677 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1678 1679 PetscCall(MatSetBlockSize(J, nc)); 1680 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1681 1682 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1683 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1684 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1685 1686 /* 1687 For each node in the grid: we get the neighbors in the local (on processor ordering 1688 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1689 PETSc ordering. 1690 */ 1691 if (!da->prealloc_only) { 1692 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1693 for (i = xs; i < xs + nx; i++) { 1694 istart = PetscMax(-s, gxs - i); 1695 iend = PetscMin(s, gxs + gnx - i - 1); 1696 slot = i - gxs; 1697 1698 cnt = 0; 1699 for (i1 = istart; i1 < iend + 1; i1++) { 1700 cols[cnt++] = nc * (slot + i1); 1701 for (l = 1; l < nc; l++) { 1702 cols[cnt] = 1 + cols[cnt - 1]; 1703 cnt++; 1704 } 1705 } 1706 rows[0] = nc * (slot); 1707 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1708 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1709 } 1710 /* do not copy values to GPU since they are all zero and not yet needed there */ 1711 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1712 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1713 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1714 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1715 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1716 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1717 PetscCall(PetscFree2(rows, cols)); 1718 } 1719 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1724 { 1725 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1726 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1727 PetscInt istart, iend, jstart, jend, ii, jj; 1728 MPI_Comm comm; 1729 PetscScalar *values; 1730 DMBoundaryType bx, by; 1731 DMDAStencilType st; 1732 ISLocalToGlobalMapping ltog; 1733 1734 PetscFunctionBegin; 1735 /* 1736 nc - number of components per grid point 1737 col - number of colors needed in one direction for single component problem 1738 */ 1739 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1740 col = 2 * s + 1; 1741 1742 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1743 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1744 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1745 1746 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1747 1748 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1749 1750 /* determine the matrix preallocation information */ 1751 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1752 for (i = xs; i < xs + nx; i++) { 1753 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1754 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1755 for (j = ys; j < ys + ny; j++) { 1756 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1757 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1758 slot = i - gxs + gnx * (j - gys); 1759 1760 /* Find block columns in block row */ 1761 cnt = 0; 1762 for (ii = istart; ii < iend + 1; ii++) { 1763 for (jj = jstart; jj < jend + 1; jj++) { 1764 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1765 cols[cnt++] = slot + ii + gnx * jj; 1766 } 1767 } 1768 } 1769 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1770 } 1771 } 1772 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1773 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1774 MatPreallocateEnd(dnz, onz); 1775 1776 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1777 1778 /* 1779 For each node in the grid: we get the neighbors in the local (on processor ordering 1780 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1781 PETSc ordering. 1782 */ 1783 if (!da->prealloc_only) { 1784 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1785 for (i = xs; i < xs + nx; i++) { 1786 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1787 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1788 for (j = ys; j < ys + ny; j++) { 1789 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1790 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1791 slot = i - gxs + gnx * (j - gys); 1792 cnt = 0; 1793 for (ii = istart; ii < iend + 1; ii++) { 1794 for (jj = jstart; jj < jend + 1; jj++) { 1795 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1796 cols[cnt++] = slot + ii + gnx * jj; 1797 } 1798 } 1799 } 1800 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1801 } 1802 } 1803 PetscCall(PetscFree(values)); 1804 /* do not copy values to GPU since they are all zero and not yet needed there */ 1805 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1806 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1807 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1808 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1809 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1810 } 1811 PetscCall(PetscFree(cols)); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1816 { 1817 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1818 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1819 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1820 MPI_Comm comm; 1821 PetscScalar *values; 1822 DMBoundaryType bx, by, bz; 1823 DMDAStencilType st; 1824 ISLocalToGlobalMapping ltog; 1825 1826 PetscFunctionBegin; 1827 /* 1828 nc - number of components per grid point 1829 col - number of colors needed in one direction for single component problem 1830 1831 */ 1832 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1833 col = 2 * s + 1; 1834 1835 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1836 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1837 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1838 1839 PetscCall(PetscMalloc1(col * col * col, &cols)); 1840 1841 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1842 1843 /* determine the matrix preallocation information */ 1844 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1845 for (i = xs; i < xs + nx; i++) { 1846 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1847 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1848 for (j = ys; j < ys + ny; j++) { 1849 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1850 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1851 for (k = zs; k < zs + nz; k++) { 1852 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1853 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1854 1855 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1856 1857 /* Find block columns in block row */ 1858 cnt = 0; 1859 for (ii = istart; ii < iend + 1; ii++) { 1860 for (jj = jstart; jj < jend + 1; jj++) { 1861 for (kk = kstart; kk < kend + 1; kk++) { 1862 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1863 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1864 } 1865 } 1866 } 1867 } 1868 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1869 } 1870 } 1871 } 1872 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1873 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1874 MatPreallocateEnd(dnz, onz); 1875 1876 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1877 1878 /* 1879 For each node in the grid: we get the neighbors in the local (on processor ordering 1880 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1881 PETSc ordering. 1882 */ 1883 if (!da->prealloc_only) { 1884 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1885 for (i = xs; i < xs + nx; i++) { 1886 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1887 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1888 for (j = ys; j < ys + ny; j++) { 1889 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1890 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1891 for (k = zs; k < zs + nz; k++) { 1892 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1893 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1894 1895 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1896 1897 cnt = 0; 1898 for (ii = istart; ii < iend + 1; ii++) { 1899 for (jj = jstart; jj < jend + 1; jj++) { 1900 for (kk = kstart; kk < kend + 1; kk++) { 1901 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1902 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1903 } 1904 } 1905 } 1906 } 1907 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1908 } 1909 } 1910 } 1911 PetscCall(PetscFree(values)); 1912 /* do not copy values to GPU since they are all zero and not yet needed there */ 1913 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1914 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1915 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1916 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1917 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1918 } 1919 PetscCall(PetscFree(cols)); 1920 PetscFunctionReturn(0); 1921 } 1922 1923 /* 1924 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1925 identify in the local ordering with periodic domain. 1926 */ 1927 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1928 { 1929 PetscInt i, n; 1930 1931 PetscFunctionBegin; 1932 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1933 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1934 for (i = 0, n = 0; i < *cnt; i++) { 1935 if (col[i] >= *row) col[n++] = col[i]; 1936 } 1937 *cnt = n; 1938 PetscFunctionReturn(0); 1939 } 1940 1941 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1942 { 1943 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1944 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1945 PetscInt istart, iend, jstart, jend, ii, jj; 1946 MPI_Comm comm; 1947 PetscScalar *values; 1948 DMBoundaryType bx, by; 1949 DMDAStencilType st; 1950 ISLocalToGlobalMapping ltog; 1951 1952 PetscFunctionBegin; 1953 /* 1954 nc - number of components per grid point 1955 col - number of colors needed in one direction for single component problem 1956 */ 1957 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1958 col = 2 * s + 1; 1959 1960 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1961 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1962 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1963 1964 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1965 1966 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1967 1968 /* determine the matrix preallocation information */ 1969 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1970 for (i = xs; i < xs + nx; i++) { 1971 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1972 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1973 for (j = ys; j < ys + ny; j++) { 1974 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1975 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1976 slot = i - gxs + gnx * (j - gys); 1977 1978 /* Find block columns in block row */ 1979 cnt = 0; 1980 for (ii = istart; ii < iend + 1; ii++) { 1981 for (jj = jstart; jj < jend + 1; jj++) { 1982 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1983 } 1984 } 1985 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1986 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1987 } 1988 } 1989 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1990 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1991 MatPreallocateEnd(dnz, onz); 1992 1993 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1994 1995 /* 1996 For each node in the grid: we get the neighbors in the local (on processor ordering 1997 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1998 PETSc ordering. 1999 */ 2000 if (!da->prealloc_only) { 2001 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 2002 for (i = xs; i < xs + nx; i++) { 2003 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2004 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2005 for (j = ys; j < ys + ny; j++) { 2006 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2007 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2008 slot = i - gxs + gnx * (j - gys); 2009 2010 /* Find block columns in block row */ 2011 cnt = 0; 2012 for (ii = istart; ii < iend + 1; ii++) { 2013 for (jj = jstart; jj < jend + 1; jj++) { 2014 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2015 } 2016 } 2017 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2018 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2019 } 2020 } 2021 PetscCall(PetscFree(values)); 2022 /* do not copy values to GPU since they are all zero and not yet needed there */ 2023 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2024 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2025 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2026 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2027 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2028 } 2029 PetscCall(PetscFree(cols)); 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2034 { 2035 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2036 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2037 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2038 MPI_Comm comm; 2039 PetscScalar *values; 2040 DMBoundaryType bx, by, bz; 2041 DMDAStencilType st; 2042 ISLocalToGlobalMapping ltog; 2043 2044 PetscFunctionBegin; 2045 /* 2046 nc - number of components per grid point 2047 col - number of colors needed in one direction for single component problem 2048 */ 2049 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2050 col = 2 * s + 1; 2051 2052 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2053 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2054 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2055 2056 /* create the matrix */ 2057 PetscCall(PetscMalloc1(col * col * col, &cols)); 2058 2059 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2060 2061 /* determine the matrix preallocation information */ 2062 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2063 for (i = xs; i < xs + nx; i++) { 2064 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2065 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2066 for (j = ys; j < ys + ny; j++) { 2067 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2068 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2069 for (k = zs; k < zs + nz; k++) { 2070 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2071 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2072 2073 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2074 2075 /* Find block columns in block row */ 2076 cnt = 0; 2077 for (ii = istart; ii < iend + 1; ii++) { 2078 for (jj = jstart; jj < jend + 1; jj++) { 2079 for (kk = kstart; kk < kend + 1; kk++) { 2080 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2081 } 2082 } 2083 } 2084 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2085 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2086 } 2087 } 2088 } 2089 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2090 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2091 MatPreallocateEnd(dnz, onz); 2092 2093 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2094 2095 /* 2096 For each node in the grid: we get the neighbors in the local (on processor ordering 2097 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2098 PETSc ordering. 2099 */ 2100 if (!da->prealloc_only) { 2101 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2102 for (i = xs; i < xs + nx; i++) { 2103 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2104 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2105 for (j = ys; j < ys + ny; j++) { 2106 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2107 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2108 for (k = zs; k < zs + nz; k++) { 2109 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2110 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2111 2112 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2113 2114 cnt = 0; 2115 for (ii = istart; ii < iend + 1; ii++) { 2116 for (jj = jstart; jj < jend + 1; jj++) { 2117 for (kk = kstart; kk < kend + 1; kk++) { 2118 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2119 } 2120 } 2121 } 2122 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2123 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2124 } 2125 } 2126 } 2127 PetscCall(PetscFree(values)); 2128 /* do not copy values to GPU since they are all zero and not yet needed there */ 2129 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2130 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2131 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2132 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2133 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2134 } 2135 PetscCall(PetscFree(cols)); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /* ---------------------------------------------------------------------------------*/ 2140 2141 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2142 { 2143 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2144 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2145 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2146 DM_DA *dd = (DM_DA *)da->data; 2147 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2148 MPI_Comm comm; 2149 PetscScalar *values; 2150 DMBoundaryType bx, by, bz; 2151 ISLocalToGlobalMapping ltog; 2152 DMDAStencilType st; 2153 PetscBool removedups = PETSC_FALSE; 2154 2155 PetscFunctionBegin; 2156 /* 2157 nc - number of components per grid point 2158 col - number of colors needed in one direction for single component problem 2159 2160 */ 2161 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2162 col = 2 * s + 1; 2163 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 2164 by 2*stencil_width + 1\n"); 2165 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 2166 by 2*stencil_width + 1\n"); 2167 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 2168 by 2*stencil_width + 1\n"); 2169 2170 /* 2171 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2172 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2173 */ 2174 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2175 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2176 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2177 2178 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2179 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2180 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2181 2182 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2183 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2184 2185 /* determine the matrix preallocation information */ 2186 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2187 2188 PetscCall(MatSetBlockSize(J, nc)); 2189 for (i = xs; i < xs + nx; i++) { 2190 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2191 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2192 for (j = ys; j < ys + ny; j++) { 2193 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2194 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2195 for (k = zs; k < zs + nz; k++) { 2196 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2197 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2198 2199 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2200 2201 for (l = 0; l < nc; l++) { 2202 cnt = 0; 2203 for (ii = istart; ii < iend + 1; ii++) { 2204 for (jj = jstart; jj < jend + 1; jj++) { 2205 for (kk = kstart; kk < kend + 1; kk++) { 2206 if (ii || jj || kk) { 2207 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2208 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2209 } 2210 } else { 2211 if (dfill) { 2212 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2213 } else { 2214 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2215 } 2216 } 2217 } 2218 } 2219 } 2220 row = l + nc * (slot); 2221 maxcnt = PetscMax(maxcnt, cnt); 2222 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2223 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2224 } 2225 } 2226 } 2227 } 2228 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2229 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2230 MatPreallocateEnd(dnz, onz); 2231 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2232 2233 /* 2234 For each node in the grid: we get the neighbors in the local (on processor ordering 2235 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2236 PETSc ordering. 2237 */ 2238 if (!da->prealloc_only) { 2239 PetscCall(PetscCalloc1(maxcnt, &values)); 2240 for (i = xs; i < xs + nx; i++) { 2241 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2242 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2243 for (j = ys; j < ys + ny; j++) { 2244 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2245 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2246 for (k = zs; k < zs + nz; k++) { 2247 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2248 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2249 2250 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2251 2252 for (l = 0; l < nc; l++) { 2253 cnt = 0; 2254 for (ii = istart; ii < iend + 1; ii++) { 2255 for (jj = jstart; jj < jend + 1; jj++) { 2256 for (kk = kstart; kk < kend + 1; kk++) { 2257 if (ii || jj || kk) { 2258 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2259 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2260 } 2261 } else { 2262 if (dfill) { 2263 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2264 } else { 2265 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2266 } 2267 } 2268 } 2269 } 2270 } 2271 row = l + nc * (slot); 2272 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2273 } 2274 } 2275 } 2276 } 2277 PetscCall(PetscFree(values)); 2278 /* do not copy values to GPU since they are all zero and not yet needed there */ 2279 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2280 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2281 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2282 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2283 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2284 } 2285 PetscCall(PetscFree(cols)); 2286 PetscFunctionReturn(0); 2287 } 2288