1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective on da 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective on da 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for dfill and ofill is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the dfill, ofill format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else if (dim > 1) { 227 if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) { 228 SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 229 } 230 } 231 } 232 233 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 234 matrices is for the blocks, not the individual matrix elements */ 235 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 236 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 237 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 238 if (isBAIJ) { 239 dd->w = 1; 240 dd->xs = dd->xs / nc; 241 dd->xe = dd->xe / nc; 242 dd->Xs = dd->Xs / nc; 243 dd->Xe = dd->Xe / nc; 244 } 245 246 /* 247 We do not provide a getcoloring function in the DMDA operations because 248 the basic DMDA does not know about matrices. We think of DMDA as being 249 more low-level then matrices. 250 */ 251 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 252 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 253 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 254 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 255 if (isBAIJ) { 256 dd->w = nc; 257 dd->xs = dd->xs * nc; 258 dd->xe = dd->xe * nc; 259 dd->Xs = dd->Xs * nc; 260 dd->Xe = dd->Xe * nc; 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 /* ---------------------------------------------------------------------------------*/ 266 267 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 268 { 269 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 270 PetscInt ncolors = 0; 271 MPI_Comm comm; 272 DMBoundaryType bx, by; 273 DMDAStencilType st; 274 ISColoringValue *colors; 275 DM_DA *dd = (DM_DA *)da->data; 276 277 PetscFunctionBegin; 278 /* 279 nc - number of components per grid point 280 col - number of colors needed in one direction for single component problem 281 282 */ 283 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 284 col = 2 * s + 1; 285 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 286 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 287 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 288 289 /* special case as taught to us by Paul Hovland */ 290 if (st == DMDA_STENCIL_STAR && s == 1) { 291 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 292 } else { 293 if (ctype == IS_COLORING_GLOBAL) { 294 if (!dd->localcoloring) { 295 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 296 ii = 0; 297 for (j = ys; j < ys + ny; j++) { 298 for (i = xs; i < xs + nx; i++) { 299 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 300 } 301 } 302 ncolors = nc + nc * (col - 1 + col * (col - 1)); 303 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 304 } 305 *coloring = dd->localcoloring; 306 } else if (ctype == IS_COLORING_LOCAL) { 307 if (!dd->ghostedcoloring) { 308 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 309 ii = 0; 310 for (j = gys; j < gys + gny; j++) { 311 for (i = gxs; i < gxs + gnx; i++) { 312 for (k = 0; k < nc; k++) { 313 /* the complicated stuff is to handle periodic boundaries */ 314 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 315 } 316 } 317 } 318 ncolors = nc + nc * (col - 1 + col * (col - 1)); 319 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 320 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 321 322 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 323 } 324 *coloring = dd->ghostedcoloring; 325 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 326 } 327 PetscCall(ISColoringReference(*coloring)); 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 /* ---------------------------------------------------------------------------------*/ 332 333 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 334 { 335 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 336 PetscInt ncolors; 337 MPI_Comm comm; 338 DMBoundaryType bx, by, bz; 339 DMDAStencilType st; 340 ISColoringValue *colors; 341 DM_DA *dd = (DM_DA *)da->data; 342 343 PetscFunctionBegin; 344 /* 345 nc - number of components per grid point 346 col - number of colors needed in one direction for single component problem 347 */ 348 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 349 col = 2 * s + 1; 350 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 351 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 352 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 353 354 /* create the coloring */ 355 if (ctype == IS_COLORING_GLOBAL) { 356 if (!dd->localcoloring) { 357 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 358 ii = 0; 359 for (k = zs; k < zs + nz; k++) { 360 for (j = ys; j < ys + ny; j++) { 361 for (i = xs; i < xs + nx; i++) { 362 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 363 } 364 } 365 } 366 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 367 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 368 } 369 *coloring = dd->localcoloring; 370 } else if (ctype == IS_COLORING_LOCAL) { 371 if (!dd->ghostedcoloring) { 372 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 373 ii = 0; 374 for (k = gzs; k < gzs + gnz; k++) { 375 for (j = gys; j < gys + gny; j++) { 376 for (i = gxs; i < gxs + gnx; i++) { 377 for (l = 0; l < nc; l++) { 378 /* the complicated stuff is to handle periodic boundaries */ 379 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 385 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 386 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 387 } 388 *coloring = dd->ghostedcoloring; 389 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 390 PetscCall(ISColoringReference(*coloring)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 /* ---------------------------------------------------------------------------------*/ 395 396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 397 { 398 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 399 PetscInt ncolors; 400 MPI_Comm comm; 401 DMBoundaryType bx; 402 ISColoringValue *colors; 403 DM_DA *dd = (DM_DA *)da->data; 404 405 PetscFunctionBegin; 406 /* 407 nc - number of components per grid point 408 col - number of colors needed in one direction for single component problem 409 */ 410 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 411 col = 2 * s + 1; 412 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 413 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 414 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 415 416 /* create the coloring */ 417 if (ctype == IS_COLORING_GLOBAL) { 418 if (!dd->localcoloring) { 419 PetscCall(PetscMalloc1(nc * nx, &colors)); 420 if (dd->ofillcols) { 421 PetscInt tc = 0; 422 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 423 i1 = 0; 424 for (i = xs; i < xs + nx; i++) { 425 for (l = 0; l < nc; l++) { 426 if (dd->ofillcols[l] && (i % col)) { 427 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 428 } else { 429 colors[i1++] = l; 430 } 431 } 432 } 433 ncolors = nc + 2 * s * tc; 434 } else { 435 i1 = 0; 436 for (i = xs; i < xs + nx; i++) { 437 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 438 } 439 ncolors = nc + nc * (col - 1); 440 } 441 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 442 } 443 *coloring = dd->localcoloring; 444 } else if (ctype == IS_COLORING_LOCAL) { 445 if (!dd->ghostedcoloring) { 446 PetscCall(PetscMalloc1(nc * gnx, &colors)); 447 i1 = 0; 448 for (i = gxs; i < gxs + gnx; i++) { 449 for (l = 0; l < nc; l++) { 450 /* the complicated stuff is to handle periodic boundaries */ 451 colors[i1++] = l + nc * (SetInRange(i, m) % col); 452 } 453 } 454 ncolors = nc + nc * (col - 1); 455 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 456 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 457 } 458 *coloring = dd->ghostedcoloring; 459 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 460 PetscCall(ISColoringReference(*coloring)); 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 465 { 466 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 467 PetscInt ncolors; 468 MPI_Comm comm; 469 DMBoundaryType bx, by; 470 ISColoringValue *colors; 471 DM_DA *dd = (DM_DA *)da->data; 472 473 PetscFunctionBegin; 474 /* 475 nc - number of components per grid point 476 col - number of colors needed in one direction for single component problem 477 */ 478 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 479 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 480 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 481 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 482 /* create the coloring */ 483 if (ctype == IS_COLORING_GLOBAL) { 484 if (!dd->localcoloring) { 485 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 486 ii = 0; 487 for (j = ys; j < ys + ny; j++) { 488 for (i = xs; i < xs + nx; i++) { 489 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 490 } 491 } 492 ncolors = 5 * nc; 493 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 494 } 495 *coloring = dd->localcoloring; 496 } else if (ctype == IS_COLORING_LOCAL) { 497 if (!dd->ghostedcoloring) { 498 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 499 ii = 0; 500 for (j = gys; j < gys + gny; j++) { 501 for (i = gxs; i < gxs + gnx; i++) { 502 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 503 } 504 } 505 ncolors = 5 * nc; 506 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 507 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 508 } 509 *coloring = dd->ghostedcoloring; 510 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513 514 /* =========================================================================== */ 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 529 530 /*@C 531 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 532 533 Logically Collective on mat 534 535 Input Parameters: 536 + mat - the matrix 537 - da - the da 538 539 Level: intermediate 540 541 .seealso: `Mat`, `MatSetUp()` 542 @*/ 543 PetscErrorCode MatSetupDM(Mat mat, DM da) 544 { 545 PetscFunctionBegin; 546 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 547 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 548 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 549 PetscFunctionReturn(PETSC_SUCCESS); 550 } 551 552 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 553 { 554 DM da; 555 const char *prefix; 556 Mat Anatural; 557 AO ao; 558 PetscInt rstart, rend, *petsc, i; 559 IS is; 560 MPI_Comm comm; 561 PetscViewerFormat format; 562 563 PetscFunctionBegin; 564 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 565 PetscCall(PetscViewerGetFormat(viewer, &format)); 566 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 567 568 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 569 PetscCall(MatGetDM(A, &da)); 570 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 571 572 PetscCall(DMDAGetAO(da, &ao)); 573 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 574 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 575 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 576 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 577 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 578 579 /* call viewer on natural ordering */ 580 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 581 PetscCall(ISDestroy(&is)); 582 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 583 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 584 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 585 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 586 PetscCall(MatView(Anatural, viewer)); 587 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 588 PetscCall(MatDestroy(&Anatural)); 589 PetscFunctionReturn(PETSC_SUCCESS); 590 } 591 592 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 593 { 594 DM da; 595 Mat Anatural, Aapp; 596 AO ao; 597 PetscInt rstart, rend, *app, i, m, n, M, N; 598 IS is; 599 MPI_Comm comm; 600 601 PetscFunctionBegin; 602 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 603 PetscCall(MatGetDM(A, &da)); 604 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 605 606 /* Load the matrix in natural ordering */ 607 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 608 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 609 PetscCall(MatGetSize(A, &M, &N)); 610 PetscCall(MatGetLocalSize(A, &m, &n)); 611 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 612 PetscCall(MatLoad(Anatural, viewer)); 613 614 /* Map natural ordering to application ordering and create IS */ 615 PetscCall(DMDAGetAO(da, &ao)); 616 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 617 PetscCall(PetscMalloc1(rend - rstart, &app)); 618 for (i = rstart; i < rend; i++) app[i - rstart] = i; 619 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 620 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 621 622 /* Do permutation and replace header */ 623 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 624 PetscCall(MatHeaderReplace(A, &Aapp)); 625 PetscCall(ISDestroy(&is)); 626 PetscCall(MatDestroy(&Anatural)); 627 PetscFunctionReturn(PETSC_SUCCESS); 628 } 629 630 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 631 { 632 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 633 Mat A; 634 MPI_Comm comm; 635 MatType Atype; 636 MatType mtype; 637 PetscMPIInt size; 638 DM_DA *dd = (DM_DA *)da->data; 639 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 640 641 PetscFunctionBegin; 642 PetscCall(MatInitializePackage()); 643 mtype = da->mattype; 644 645 /* 646 m 647 ------------------------------------------------------ 648 | | 649 | | 650 | ---------------------- | 651 | | | | 652 n | ny | | | 653 | | | | 654 | .--------------------- | 655 | (xs,ys) nx | 656 | . | 657 | (gxs,gys) | 658 | | 659 ----------------------------------------------------- 660 */ 661 662 /* 663 nc - number of components per grid point 664 col - number of colors needed in one direction for single component problem 665 */ 666 M = dd->M; 667 N = dd->N; 668 P = dd->P; 669 dim = da->dim; 670 dof = dd->w; 671 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 672 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 673 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 674 PetscCall(MatCreate(comm, &A)); 675 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 676 PetscCall(MatSetType(A, mtype)); 677 PetscCall(MatSetFromOptions(A)); 678 if (dof * nx * ny * nz < da->bind_below) { 679 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 680 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 681 } 682 PetscCall(MatSetDM(A, da)); 683 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 684 PetscCall(MatGetType(A, &Atype)); 685 /* 686 We do not provide a getmatrix function in the DMDA operations because 687 the basic DMDA does not know about matrices. We think of DMDA as being more 688 more low-level than matrices. This is kind of cheating but, cause sometimes 689 we think of DMDA has higher level than matrices. 690 691 We could switch based on Atype (or mtype), but we do not since the 692 specialized setting routines depend only on the particular preallocation 693 details of the matrix, not the type itself. 694 */ 695 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 696 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 697 if (!aij) { 698 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 699 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 700 if (!baij) { 701 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 702 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 703 if (!sbaij) { 704 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 705 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 706 } 707 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 708 } 709 } 710 if (aij) { 711 if (dim == 1) { 712 if (dd->ofill) { 713 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 714 } else { 715 DMBoundaryType bx; 716 PetscMPIInt size; 717 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 718 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 719 if (size == 1 && bx == DM_BOUNDARY_NONE) { 720 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 721 } else { 722 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 723 } 724 } 725 } else if (dim == 2) { 726 if (dd->ofill) { 727 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 728 } else { 729 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 730 } 731 } else if (dim == 3) { 732 if (dd->ofill) { 733 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 734 } else { 735 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 736 } 737 } 738 } else if (baij) { 739 if (dim == 2) { 740 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 741 } else if (dim == 3) { 742 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 743 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 744 } else if (sbaij) { 745 if (dim == 2) { 746 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 747 } else if (dim == 3) { 748 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 749 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 750 } else if (sell) { 751 if (dim == 2) { 752 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 753 } else if (dim == 3) { 754 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 755 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 756 } else if (is) { 757 PetscCall(DMCreateMatrix_DA_IS(da, A)); 758 } else { 759 ISLocalToGlobalMapping ltog; 760 761 PetscCall(MatSetBlockSize(A, dof)); 762 PetscCall(MatSetUp(A)); 763 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 764 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 765 } 766 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 767 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 768 PetscCall(MatSetDM(A, da)); 769 PetscCallMPI(MPI_Comm_size(comm, &size)); 770 if (size > 1) { 771 /* change viewer to display matrix in natural ordering */ 772 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 773 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 774 } 775 *J = A; 776 PetscFunctionReturn(PETSC_SUCCESS); 777 } 778 779 /* ---------------------------------------------------------------------------------*/ 780 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 781 782 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 783 { 784 DM_DA *da = (DM_DA *)dm->data; 785 Mat lJ, P; 786 ISLocalToGlobalMapping ltog; 787 IS is; 788 PetscBT bt; 789 const PetscInt *e_loc, *idx; 790 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 791 792 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 793 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 794 PetscFunctionBegin; 795 dof = da->w; 796 PetscCall(MatSetBlockSize(J, dof)); 797 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 798 799 /* flag local elements indices in local DMDA numbering */ 800 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 801 PetscCall(PetscBTCreate(nv / dof, &bt)); 802 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 803 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 804 805 /* filter out (set to -1) the global indices not used by the local elements */ 806 PetscCall(PetscMalloc1(nv / dof, &gidx)); 807 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 808 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 809 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 810 for (i = 0; i < nv / dof; i++) 811 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 812 PetscCall(PetscBTDestroy(&bt)); 813 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 814 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 815 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 816 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 817 PetscCall(ISDestroy(&is)); 818 819 /* Preallocation */ 820 if (dm->prealloc_skip) { 821 PetscCall(MatSetUp(J)); 822 } else { 823 PetscCall(MatISGetLocalMat(J, &lJ)); 824 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 825 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 826 PetscCall(MatSetType(P, MATPREALLOCATOR)); 827 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 828 PetscCall(MatGetSize(lJ, &N, NULL)); 829 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 830 PetscCall(MatSetSizes(P, n, n, N, N)); 831 PetscCall(MatSetBlockSize(P, dof)); 832 PetscCall(MatSetUp(P)); 833 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 834 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 835 PetscCall(MatISRestoreLocalMat(J, &lJ)); 836 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 837 PetscCall(MatDestroy(&P)); 838 839 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 840 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 841 } 842 PetscFunctionReturn(PETSC_SUCCESS); 843 } 844 845 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 846 { 847 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 848 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 849 MPI_Comm comm; 850 PetscScalar *values; 851 DMBoundaryType bx, by; 852 ISLocalToGlobalMapping ltog; 853 DMDAStencilType st; 854 855 PetscFunctionBegin; 856 /* 857 nc - number of components per grid point 858 col - number of colors needed in one direction for single component problem 859 */ 860 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 861 col = 2 * s + 1; 862 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 863 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 864 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 865 866 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 867 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 868 869 PetscCall(MatSetBlockSize(J, nc)); 870 /* determine the matrix preallocation information */ 871 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 872 for (i = xs; i < xs + nx; i++) { 873 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 874 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 875 876 for (j = ys; j < ys + ny; j++) { 877 slot = i - gxs + gnx * (j - gys); 878 879 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 880 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 881 882 cnt = 0; 883 for (k = 0; k < nc; k++) { 884 for (l = lstart; l < lend + 1; l++) { 885 for (p = pstart; p < pend + 1; p++) { 886 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 887 cols[cnt++] = k + nc * (slot + gnx * l + p); 888 } 889 } 890 } 891 rows[k] = k + nc * (slot); 892 } 893 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 894 } 895 } 896 PetscCall(MatSetBlockSize(J, nc)); 897 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 898 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 899 MatPreallocateEnd(dnz, onz); 900 901 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 902 903 /* 904 For each node in the grid: we get the neighbors in the local (on processor ordering 905 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 906 PETSc ordering. 907 */ 908 if (!da->prealloc_only) { 909 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 910 for (i = xs; i < xs + nx; i++) { 911 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 912 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 913 914 for (j = ys; j < ys + ny; j++) { 915 slot = i - gxs + gnx * (j - gys); 916 917 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 918 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 919 920 cnt = 0; 921 for (k = 0; k < nc; k++) { 922 for (l = lstart; l < lend + 1; l++) { 923 for (p = pstart; p < pend + 1; p++) { 924 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 925 cols[cnt++] = k + nc * (slot + gnx * l + p); 926 } 927 } 928 } 929 rows[k] = k + nc * (slot); 930 } 931 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 932 } 933 } 934 PetscCall(PetscFree(values)); 935 /* do not copy values to GPU since they are all zero and not yet needed there */ 936 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 937 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 938 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 939 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 940 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 941 } 942 PetscCall(PetscFree2(rows, cols)); 943 PetscFunctionReturn(PETSC_SUCCESS); 944 } 945 946 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 947 { 948 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 949 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 950 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 951 MPI_Comm comm; 952 PetscScalar *values; 953 DMBoundaryType bx, by, bz; 954 ISLocalToGlobalMapping ltog; 955 DMDAStencilType st; 956 957 PetscFunctionBegin; 958 /* 959 nc - number of components per grid point 960 col - number of colors needed in one direction for single component problem 961 */ 962 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 963 col = 2 * s + 1; 964 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 965 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 966 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 967 968 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 969 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 970 971 PetscCall(MatSetBlockSize(J, nc)); 972 /* determine the matrix preallocation information */ 973 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 974 for (i = xs; i < xs + nx; i++) { 975 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 976 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 977 for (j = ys; j < ys + ny; j++) { 978 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 979 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 980 for (k = zs; k < zs + nz; k++) { 981 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 982 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 983 984 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 985 986 cnt = 0; 987 for (l = 0; l < nc; l++) { 988 for (ii = istart; ii < iend + 1; ii++) { 989 for (jj = jstart; jj < jend + 1; jj++) { 990 for (kk = kstart; kk < kend + 1; kk++) { 991 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 992 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 993 } 994 } 995 } 996 } 997 rows[l] = l + nc * (slot); 998 } 999 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1000 } 1001 } 1002 } 1003 PetscCall(MatSetBlockSize(J, nc)); 1004 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1005 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1006 MatPreallocateEnd(dnz, onz); 1007 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1008 1009 /* 1010 For each node in the grid: we get the neighbors in the local (on processor ordering 1011 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1012 PETSc ordering. 1013 */ 1014 if (!da->prealloc_only) { 1015 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1016 for (i = xs; i < xs + nx; i++) { 1017 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1018 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1019 for (j = ys; j < ys + ny; j++) { 1020 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1021 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1022 for (k = zs; k < zs + nz; k++) { 1023 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1024 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1025 1026 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1027 1028 cnt = 0; 1029 for (l = 0; l < nc; l++) { 1030 for (ii = istart; ii < iend + 1; ii++) { 1031 for (jj = jstart; jj < jend + 1; jj++) { 1032 for (kk = kstart; kk < kend + 1; kk++) { 1033 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1034 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1035 } 1036 } 1037 } 1038 } 1039 rows[l] = l + nc * (slot); 1040 } 1041 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1042 } 1043 } 1044 } 1045 PetscCall(PetscFree(values)); 1046 /* do not copy values to GPU since they are all zero and not yet needed there */ 1047 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1048 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1049 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1050 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1051 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1052 } 1053 PetscCall(PetscFree2(rows, cols)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1058 { 1059 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1060 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1061 MPI_Comm comm; 1062 DMBoundaryType bx, by; 1063 ISLocalToGlobalMapping ltog, mltog; 1064 DMDAStencilType st; 1065 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1066 1067 PetscFunctionBegin; 1068 /* 1069 nc - number of components per grid point 1070 col - number of colors needed in one direction for single component problem 1071 */ 1072 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1073 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1074 col = 2 * s + 1; 1075 /* 1076 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1077 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1078 */ 1079 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1080 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1081 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1082 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1083 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1084 1085 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1086 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1087 1088 PetscCall(MatSetBlockSize(J, nc)); 1089 /* determine the matrix preallocation information */ 1090 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1091 for (i = xs; i < xs + nx; i++) { 1092 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1093 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1094 1095 for (j = ys; j < ys + ny; j++) { 1096 slot = i - gxs + gnx * (j - gys); 1097 1098 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1099 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1100 1101 cnt = 0; 1102 for (k = 0; k < nc; k++) { 1103 for (l = lstart; l < lend + 1; l++) { 1104 for (p = pstart; p < pend + 1; p++) { 1105 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1106 cols[cnt++] = k + nc * (slot + gnx * l + p); 1107 } 1108 } 1109 } 1110 rows[k] = k + nc * (slot); 1111 } 1112 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1113 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1114 } 1115 } 1116 PetscCall(MatSetBlockSize(J, nc)); 1117 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1118 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1119 MatPreallocateEnd(dnz, onz); 1120 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1121 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1122 1123 /* 1124 For each node in the grid: we get the neighbors in the local (on processor ordering 1125 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1126 PETSc ordering. 1127 */ 1128 if (!da->prealloc_only) { 1129 for (i = xs; i < xs + nx; i++) { 1130 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1131 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1132 1133 for (j = ys; j < ys + ny; j++) { 1134 slot = i - gxs + gnx * (j - gys); 1135 1136 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1137 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1138 1139 cnt = 0; 1140 for (l = lstart; l < lend + 1; l++) { 1141 for (p = pstart; p < pend + 1; p++) { 1142 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1143 cols[cnt++] = nc * (slot + gnx * l + p); 1144 for (k = 1; k < nc; k++) { 1145 cols[cnt] = 1 + cols[cnt - 1]; 1146 cnt++; 1147 } 1148 } 1149 } 1150 } 1151 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1152 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1153 } 1154 } 1155 /* do not copy values to GPU since they are all zero and not yet needed there */ 1156 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1157 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1158 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1159 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1160 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1161 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1162 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1163 } 1164 PetscCall(PetscFree2(rows, cols)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1169 { 1170 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1171 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1172 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1173 DM_DA *dd = (DM_DA *)da->data; 1174 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1175 MPI_Comm comm; 1176 DMBoundaryType bx, by; 1177 ISLocalToGlobalMapping ltog; 1178 DMDAStencilType st; 1179 PetscBool removedups = PETSC_FALSE; 1180 1181 PetscFunctionBegin; 1182 /* 1183 nc - number of components per grid point 1184 col - number of colors needed in one direction for single component problem 1185 */ 1186 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1187 col = 2 * s + 1; 1188 /* 1189 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1190 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1191 */ 1192 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1193 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1194 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1195 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1196 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1197 1198 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1199 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1200 1201 PetscCall(MatSetBlockSize(J, nc)); 1202 /* determine the matrix preallocation information */ 1203 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1204 for (i = xs; i < xs + nx; i++) { 1205 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1206 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1207 1208 for (j = ys; j < ys + ny; j++) { 1209 slot = i - gxs + gnx * (j - gys); 1210 1211 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1212 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1213 1214 for (k = 0; k < nc; k++) { 1215 cnt = 0; 1216 for (l = lstart; l < lend + 1; l++) { 1217 for (p = pstart; p < pend + 1; p++) { 1218 if (l || p) { 1219 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1220 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1221 } 1222 } else { 1223 if (dfill) { 1224 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1225 } else { 1226 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1227 } 1228 } 1229 } 1230 } 1231 row = k + nc * (slot); 1232 maxcnt = PetscMax(maxcnt, cnt); 1233 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1234 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1235 } 1236 } 1237 } 1238 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1239 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1240 MatPreallocateEnd(dnz, onz); 1241 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1242 1243 /* 1244 For each node in the grid: we get the neighbors in the local (on processor ordering 1245 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1246 PETSc ordering. 1247 */ 1248 if (!da->prealloc_only) { 1249 for (i = xs; i < xs + nx; i++) { 1250 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1251 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1252 1253 for (j = ys; j < ys + ny; j++) { 1254 slot = i - gxs + gnx * (j - gys); 1255 1256 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1257 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1258 1259 for (k = 0; k < nc; k++) { 1260 cnt = 0; 1261 for (l = lstart; l < lend + 1; l++) { 1262 for (p = pstart; p < pend + 1; p++) { 1263 if (l || p) { 1264 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1265 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1266 } 1267 } else { 1268 if (dfill) { 1269 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1270 } else { 1271 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1272 } 1273 } 1274 } 1275 } 1276 row = k + nc * (slot); 1277 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1278 } 1279 } 1280 } 1281 /* do not copy values to GPU since they are all zero and not yet needed there */ 1282 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1283 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1284 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1285 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1286 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1287 } 1288 PetscCall(PetscFree(cols)); 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 /* ---------------------------------------------------------------------------------*/ 1293 1294 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1295 { 1296 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1297 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1298 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1299 MPI_Comm comm; 1300 DMBoundaryType bx, by, bz; 1301 ISLocalToGlobalMapping ltog, mltog; 1302 DMDAStencilType st; 1303 PetscBool removedups = PETSC_FALSE; 1304 1305 PetscFunctionBegin; 1306 /* 1307 nc - number of components per grid point 1308 col - number of colors needed in one direction for single component problem 1309 */ 1310 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1311 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1312 col = 2 * s + 1; 1313 1314 /* 1315 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1316 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1317 */ 1318 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1319 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1320 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1321 1322 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1323 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1324 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1325 1326 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1327 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1328 1329 PetscCall(MatSetBlockSize(J, nc)); 1330 /* determine the matrix preallocation information */ 1331 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1332 for (i = xs; i < xs + nx; i++) { 1333 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1334 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1335 for (j = ys; j < ys + ny; j++) { 1336 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1337 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1338 for (k = zs; k < zs + nz; k++) { 1339 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1340 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1341 1342 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1343 1344 cnt = 0; 1345 for (l = 0; l < nc; l++) { 1346 for (ii = istart; ii < iend + 1; ii++) { 1347 for (jj = jstart; jj < jend + 1; jj++) { 1348 for (kk = kstart; kk < kend + 1; kk++) { 1349 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1350 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1351 } 1352 } 1353 } 1354 } 1355 rows[l] = l + nc * (slot); 1356 } 1357 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1358 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1359 } 1360 } 1361 } 1362 PetscCall(MatSetBlockSize(J, nc)); 1363 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1364 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1365 MatPreallocateEnd(dnz, onz); 1366 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1367 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1368 1369 /* 1370 For each node in the grid: we get the neighbors in the local (on processor ordering 1371 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1372 PETSc ordering. 1373 */ 1374 if (!da->prealloc_only) { 1375 for (i = xs; i < xs + nx; i++) { 1376 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1377 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1378 for (j = ys; j < ys + ny; j++) { 1379 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1380 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1381 for (k = zs; k < zs + nz; k++) { 1382 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1383 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1384 1385 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1386 1387 cnt = 0; 1388 for (kk = kstart; kk < kend + 1; kk++) { 1389 for (jj = jstart; jj < jend + 1; jj++) { 1390 for (ii = istart; ii < iend + 1; ii++) { 1391 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1392 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1393 for (l = 1; l < nc; l++) { 1394 cols[cnt] = 1 + cols[cnt - 1]; 1395 cnt++; 1396 } 1397 } 1398 } 1399 } 1400 } 1401 rows[0] = nc * (slot); 1402 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1403 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1404 } 1405 } 1406 } 1407 /* do not copy values to GPU since they are all zero and not yet needed there */ 1408 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1409 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1410 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1411 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1412 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1413 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1414 } 1415 PetscCall(PetscFree2(rows, cols)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 /* ---------------------------------------------------------------------------------*/ 1420 1421 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1422 { 1423 DM_DA *dd = (DM_DA *)da->data; 1424 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1425 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1426 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1427 DMBoundaryType bx; 1428 ISLocalToGlobalMapping ltog; 1429 PetscMPIInt rank, size; 1430 1431 PetscFunctionBegin; 1432 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1433 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1434 1435 /* 1436 nc - number of components per grid point 1437 */ 1438 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1439 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1440 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1441 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1442 1443 PetscCall(MatSetBlockSize(J, nc)); 1444 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1445 1446 /* 1447 note should be smaller for first and last process with no periodic 1448 does not handle dfill 1449 */ 1450 cnt = 0; 1451 /* coupling with process to the left */ 1452 for (i = 0; i < s; i++) { 1453 for (j = 0; j < nc; j++) { 1454 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1455 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1456 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1457 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1458 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1459 } 1460 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1461 cnt++; 1462 } 1463 } 1464 for (i = s; i < nx - s; i++) { 1465 for (j = 0; j < nc; j++) { 1466 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1467 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1468 cnt++; 1469 } 1470 } 1471 /* coupling with process to the right */ 1472 for (i = nx - s; i < nx; i++) { 1473 for (j = 0; j < nc; j++) { 1474 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1475 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1476 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1477 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1478 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1479 } 1480 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1481 cnt++; 1482 } 1483 } 1484 1485 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1486 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1487 PetscCall(PetscFree2(cols, ocols)); 1488 1489 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1490 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1491 1492 /* 1493 For each node in the grid: we get the neighbors in the local (on processor ordering 1494 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1495 PETSc ordering. 1496 */ 1497 if (!da->prealloc_only) { 1498 PetscCall(PetscMalloc1(maxcnt, &cols)); 1499 row = xs * nc; 1500 /* coupling with process to the left */ 1501 for (i = xs; i < xs + s; i++) { 1502 for (j = 0; j < nc; j++) { 1503 cnt = 0; 1504 if (rank) { 1505 for (l = 0; l < s; l++) { 1506 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1507 } 1508 } 1509 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1510 for (l = 0; l < s; l++) { 1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1512 } 1513 } 1514 if (dfill) { 1515 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1516 } else { 1517 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1518 } 1519 for (l = 0; l < s; l++) { 1520 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1521 } 1522 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1523 row++; 1524 } 1525 } 1526 for (i = xs + s; i < xs + nx - s; i++) { 1527 for (j = 0; j < nc; j++) { 1528 cnt = 0; 1529 for (l = 0; l < s; l++) { 1530 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1531 } 1532 if (dfill) { 1533 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1534 } else { 1535 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1536 } 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1539 } 1540 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1541 row++; 1542 } 1543 } 1544 /* coupling with process to the right */ 1545 for (i = xs + nx - s; i < xs + nx; i++) { 1546 for (j = 0; j < nc; j++) { 1547 cnt = 0; 1548 for (l = 0; l < s; l++) { 1549 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1550 } 1551 if (dfill) { 1552 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1553 } else { 1554 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1555 } 1556 if (rank < size - 1) { 1557 for (l = 0; l < s; l++) { 1558 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1559 } 1560 } 1561 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1562 for (l = 0; l < s; l++) { 1563 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1564 } 1565 } 1566 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1567 row++; 1568 } 1569 } 1570 PetscCall(PetscFree(cols)); 1571 /* do not copy values to GPU since they are all zero and not yet needed there */ 1572 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1573 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1575 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1576 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1577 } 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 /* ---------------------------------------------------------------------------------*/ 1582 1583 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1584 { 1585 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1586 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1587 PetscInt istart, iend; 1588 DMBoundaryType bx; 1589 ISLocalToGlobalMapping ltog, mltog; 1590 1591 PetscFunctionBegin; 1592 /* 1593 nc - number of components per grid point 1594 col - number of colors needed in one direction for single component problem 1595 */ 1596 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1597 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1598 col = 2 * s + 1; 1599 1600 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1601 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1602 1603 PetscCall(MatSetBlockSize(J, nc)); 1604 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1605 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1606 1607 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1608 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1609 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1610 1611 /* 1612 For each node in the grid: we get the neighbors in the local (on processor ordering 1613 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1614 PETSc ordering. 1615 */ 1616 if (!da->prealloc_only) { 1617 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1618 for (i = xs; i < xs + nx; i++) { 1619 istart = PetscMax(-s, gxs - i); 1620 iend = PetscMin(s, gxs + gnx - i - 1); 1621 slot = i - gxs; 1622 1623 cnt = 0; 1624 for (i1 = istart; i1 < iend + 1; i1++) { 1625 cols[cnt++] = nc * (slot + i1); 1626 for (l = 1; l < nc; l++) { 1627 cols[cnt] = 1 + cols[cnt - 1]; 1628 cnt++; 1629 } 1630 } 1631 rows[0] = nc * (slot); 1632 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1633 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1634 } 1635 /* do not copy values to GPU since they are all zero and not yet needed there */ 1636 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1637 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1638 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1639 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1640 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1641 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1642 PetscCall(PetscFree2(rows, cols)); 1643 } 1644 PetscFunctionReturn(PETSC_SUCCESS); 1645 } 1646 1647 /* ---------------------------------------------------------------------------------*/ 1648 1649 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1650 { 1651 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1652 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1653 PetscInt istart, iend; 1654 DMBoundaryType bx; 1655 ISLocalToGlobalMapping ltog, mltog; 1656 1657 PetscFunctionBegin; 1658 /* 1659 nc - number of components per grid point 1660 col - number of colors needed in one direction for single component problem 1661 */ 1662 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1663 col = 2 * s + 1; 1664 1665 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1666 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1667 1668 PetscCall(MatSetBlockSize(J, nc)); 1669 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1670 1671 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1672 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1673 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1674 1675 /* 1676 For each node in the grid: we get the neighbors in the local (on processor ordering 1677 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1678 PETSc ordering. 1679 */ 1680 if (!da->prealloc_only) { 1681 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1682 for (i = xs; i < xs + nx; i++) { 1683 istart = PetscMax(-s, gxs - i); 1684 iend = PetscMin(s, gxs + gnx - i - 1); 1685 slot = i - gxs; 1686 1687 cnt = 0; 1688 for (i1 = istart; i1 < iend + 1; i1++) { 1689 cols[cnt++] = nc * (slot + i1); 1690 for (l = 1; l < nc; l++) { 1691 cols[cnt] = 1 + cols[cnt - 1]; 1692 cnt++; 1693 } 1694 } 1695 rows[0] = nc * (slot); 1696 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1697 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1698 } 1699 /* do not copy values to GPU since they are all zero and not yet needed there */ 1700 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1701 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1702 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1703 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1704 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1705 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1706 PetscCall(PetscFree2(rows, cols)); 1707 } 1708 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1713 { 1714 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1715 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1716 PetscInt istart, iend, jstart, jend, ii, jj; 1717 MPI_Comm comm; 1718 PetscScalar *values; 1719 DMBoundaryType bx, by; 1720 DMDAStencilType st; 1721 ISLocalToGlobalMapping ltog; 1722 1723 PetscFunctionBegin; 1724 /* 1725 nc - number of components per grid point 1726 col - number of colors needed in one direction for single component problem 1727 */ 1728 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1729 col = 2 * s + 1; 1730 1731 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1732 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1733 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1734 1735 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1736 1737 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1738 1739 /* determine the matrix preallocation information */ 1740 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1741 for (i = xs; i < xs + nx; i++) { 1742 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1743 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1744 for (j = ys; j < ys + ny; j++) { 1745 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1746 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1747 slot = i - gxs + gnx * (j - gys); 1748 1749 /* Find block columns in block row */ 1750 cnt = 0; 1751 for (ii = istart; ii < iend + 1; ii++) { 1752 for (jj = jstart; jj < jend + 1; jj++) { 1753 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1754 cols[cnt++] = slot + ii + gnx * jj; 1755 } 1756 } 1757 } 1758 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1759 } 1760 } 1761 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1762 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1763 MatPreallocateEnd(dnz, onz); 1764 1765 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1766 1767 /* 1768 For each node in the grid: we get the neighbors in the local (on processor ordering 1769 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1770 PETSc ordering. 1771 */ 1772 if (!da->prealloc_only) { 1773 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1774 for (i = xs; i < xs + nx; i++) { 1775 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1776 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1777 for (j = ys; j < ys + ny; j++) { 1778 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1779 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1780 slot = i - gxs + gnx * (j - gys); 1781 cnt = 0; 1782 for (ii = istart; ii < iend + 1; ii++) { 1783 for (jj = jstart; jj < jend + 1; jj++) { 1784 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1785 cols[cnt++] = slot + ii + gnx * jj; 1786 } 1787 } 1788 } 1789 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1790 } 1791 } 1792 PetscCall(PetscFree(values)); 1793 /* do not copy values to GPU since they are all zero and not yet needed there */ 1794 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1795 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1796 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1797 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1798 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1799 } 1800 PetscCall(PetscFree(cols)); 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1805 { 1806 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1807 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1808 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1809 MPI_Comm comm; 1810 PetscScalar *values; 1811 DMBoundaryType bx, by, bz; 1812 DMDAStencilType st; 1813 ISLocalToGlobalMapping ltog; 1814 1815 PetscFunctionBegin; 1816 /* 1817 nc - number of components per grid point 1818 col - number of colors needed in one direction for single component problem 1819 */ 1820 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1821 col = 2 * s + 1; 1822 1823 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1824 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1825 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1826 1827 PetscCall(PetscMalloc1(col * col * col, &cols)); 1828 1829 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1830 1831 /* determine the matrix preallocation information */ 1832 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1833 for (i = xs; i < xs + nx; i++) { 1834 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1835 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1836 for (j = ys; j < ys + ny; j++) { 1837 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1838 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1839 for (k = zs; k < zs + nz; k++) { 1840 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1841 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1842 1843 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1844 1845 /* Find block columns in block row */ 1846 cnt = 0; 1847 for (ii = istart; ii < iend + 1; ii++) { 1848 for (jj = jstart; jj < jend + 1; jj++) { 1849 for (kk = kstart; kk < kend + 1; kk++) { 1850 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1851 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1852 } 1853 } 1854 } 1855 } 1856 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1857 } 1858 } 1859 } 1860 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1861 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1862 MatPreallocateEnd(dnz, onz); 1863 1864 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1865 1866 /* 1867 For each node in the grid: we get the neighbors in the local (on processor ordering 1868 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1869 PETSc ordering. 1870 */ 1871 if (!da->prealloc_only) { 1872 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1873 for (i = xs; i < xs + nx; i++) { 1874 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1875 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1876 for (j = ys; j < ys + ny; j++) { 1877 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1878 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1879 for (k = zs; k < zs + nz; k++) { 1880 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1881 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1882 1883 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1884 1885 cnt = 0; 1886 for (ii = istart; ii < iend + 1; ii++) { 1887 for (jj = jstart; jj < jend + 1; jj++) { 1888 for (kk = kstart; kk < kend + 1; kk++) { 1889 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1890 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1891 } 1892 } 1893 } 1894 } 1895 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1896 } 1897 } 1898 } 1899 PetscCall(PetscFree(values)); 1900 /* do not copy values to GPU since they are all zero and not yet needed there */ 1901 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1902 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1903 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1904 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1905 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1906 } 1907 PetscCall(PetscFree(cols)); 1908 PetscFunctionReturn(PETSC_SUCCESS); 1909 } 1910 1911 /* 1912 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1913 identify in the local ordering with periodic domain. 1914 */ 1915 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1916 { 1917 PetscInt i, n; 1918 1919 PetscFunctionBegin; 1920 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1921 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1922 for (i = 0, n = 0; i < *cnt; i++) { 1923 if (col[i] >= *row) col[n++] = col[i]; 1924 } 1925 *cnt = n; 1926 PetscFunctionReturn(PETSC_SUCCESS); 1927 } 1928 1929 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1930 { 1931 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1932 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1933 PetscInt istart, iend, jstart, jend, ii, jj; 1934 MPI_Comm comm; 1935 PetscScalar *values; 1936 DMBoundaryType bx, by; 1937 DMDAStencilType st; 1938 ISLocalToGlobalMapping ltog; 1939 1940 PetscFunctionBegin; 1941 /* 1942 nc - number of components per grid point 1943 col - number of colors needed in one direction for single component problem 1944 */ 1945 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1946 col = 2 * s + 1; 1947 1948 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1949 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1950 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1951 1952 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1953 1954 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1955 1956 /* determine the matrix preallocation information */ 1957 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1958 for (i = xs; i < xs + nx; i++) { 1959 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1960 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1961 for (j = ys; j < ys + ny; j++) { 1962 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1963 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1964 slot = i - gxs + gnx * (j - gys); 1965 1966 /* Find block columns in block row */ 1967 cnt = 0; 1968 for (ii = istart; ii < iend + 1; ii++) { 1969 for (jj = jstart; jj < jend + 1; jj++) { 1970 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1971 } 1972 } 1973 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1974 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1975 } 1976 } 1977 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1978 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1979 MatPreallocateEnd(dnz, onz); 1980 1981 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1982 1983 /* 1984 For each node in the grid: we get the neighbors in the local (on processor ordering 1985 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1986 PETSc ordering. 1987 */ 1988 if (!da->prealloc_only) { 1989 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1990 for (i = xs; i < xs + nx; i++) { 1991 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1992 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1993 for (j = ys; j < ys + ny; j++) { 1994 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1995 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1996 slot = i - gxs + gnx * (j - gys); 1997 1998 /* Find block columns in block row */ 1999 cnt = 0; 2000 for (ii = istart; ii < iend + 1; ii++) { 2001 for (jj = jstart; jj < jend + 1; jj++) { 2002 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2003 } 2004 } 2005 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2006 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2007 } 2008 } 2009 PetscCall(PetscFree(values)); 2010 /* do not copy values to GPU since they are all zero and not yet needed there */ 2011 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2012 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2013 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2014 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2015 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2016 } 2017 PetscCall(PetscFree(cols)); 2018 PetscFunctionReturn(PETSC_SUCCESS); 2019 } 2020 2021 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2022 { 2023 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2024 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2025 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2026 MPI_Comm comm; 2027 PetscScalar *values; 2028 DMBoundaryType bx, by, bz; 2029 DMDAStencilType st; 2030 ISLocalToGlobalMapping ltog; 2031 2032 PetscFunctionBegin; 2033 /* 2034 nc - number of components per grid point 2035 col - number of colors needed in one direction for single component problem 2036 */ 2037 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2038 col = 2 * s + 1; 2039 2040 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2041 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2042 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2043 2044 /* create the matrix */ 2045 PetscCall(PetscMalloc1(col * col * col, &cols)); 2046 2047 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2048 2049 /* determine the matrix preallocation information */ 2050 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2051 for (i = xs; i < xs + nx; i++) { 2052 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2053 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2054 for (j = ys; j < ys + ny; j++) { 2055 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2056 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2057 for (k = zs; k < zs + nz; k++) { 2058 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2059 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2060 2061 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2062 2063 /* Find block columns in block row */ 2064 cnt = 0; 2065 for (ii = istart; ii < iend + 1; ii++) { 2066 for (jj = jstart; jj < jend + 1; jj++) { 2067 for (kk = kstart; kk < kend + 1; kk++) { 2068 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2069 } 2070 } 2071 } 2072 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2073 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2074 } 2075 } 2076 } 2077 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2078 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2079 MatPreallocateEnd(dnz, onz); 2080 2081 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2082 2083 /* 2084 For each node in the grid: we get the neighbors in the local (on processor ordering 2085 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2086 PETSc ordering. 2087 */ 2088 if (!da->prealloc_only) { 2089 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2090 for (i = xs; i < xs + nx; i++) { 2091 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2092 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2093 for (j = ys; j < ys + ny; j++) { 2094 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2095 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2096 for (k = zs; k < zs + nz; k++) { 2097 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2098 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2099 2100 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2101 2102 cnt = 0; 2103 for (ii = istart; ii < iend + 1; ii++) { 2104 for (jj = jstart; jj < jend + 1; jj++) { 2105 for (kk = kstart; kk < kend + 1; kk++) { 2106 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2107 } 2108 } 2109 } 2110 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2111 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2112 } 2113 } 2114 } 2115 PetscCall(PetscFree(values)); 2116 /* do not copy values to GPU since they are all zero and not yet needed there */ 2117 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2118 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2119 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2120 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2121 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2122 } 2123 PetscCall(PetscFree(cols)); 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 /* ---------------------------------------------------------------------------------*/ 2128 2129 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2130 { 2131 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2132 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2133 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2134 DM_DA *dd = (DM_DA *)da->data; 2135 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2136 MPI_Comm comm; 2137 PetscScalar *values; 2138 DMBoundaryType bx, by, bz; 2139 ISLocalToGlobalMapping ltog; 2140 DMDAStencilType st; 2141 PetscBool removedups = PETSC_FALSE; 2142 2143 PetscFunctionBegin; 2144 /* 2145 nc - number of components per grid point 2146 col - number of colors needed in one direction for single component problem 2147 */ 2148 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2149 col = 2 * s + 1; 2150 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 2151 by 2*stencil_width + 1\n"); 2152 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 2153 by 2*stencil_width + 1\n"); 2154 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 2155 by 2*stencil_width + 1\n"); 2156 2157 /* 2158 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2159 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2160 */ 2161 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2162 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2163 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2164 2165 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2166 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2167 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2168 2169 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2170 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2171 2172 /* determine the matrix preallocation information */ 2173 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2174 2175 PetscCall(MatSetBlockSize(J, nc)); 2176 for (i = xs; i < xs + nx; i++) { 2177 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2178 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2179 for (j = ys; j < ys + ny; j++) { 2180 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2181 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2182 for (k = zs; k < zs + nz; k++) { 2183 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2184 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2185 2186 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2187 2188 for (l = 0; l < nc; l++) { 2189 cnt = 0; 2190 for (ii = istart; ii < iend + 1; ii++) { 2191 for (jj = jstart; jj < jend + 1; jj++) { 2192 for (kk = kstart; kk < kend + 1; kk++) { 2193 if (ii || jj || kk) { 2194 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2195 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2196 } 2197 } else { 2198 if (dfill) { 2199 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2200 } else { 2201 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2202 } 2203 } 2204 } 2205 } 2206 } 2207 row = l + nc * (slot); 2208 maxcnt = PetscMax(maxcnt, cnt); 2209 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2210 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2211 } 2212 } 2213 } 2214 } 2215 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2216 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2217 MatPreallocateEnd(dnz, onz); 2218 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2219 2220 /* 2221 For each node in the grid: we get the neighbors in the local (on processor ordering 2222 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2223 PETSc ordering. 2224 */ 2225 if (!da->prealloc_only) { 2226 PetscCall(PetscCalloc1(maxcnt, &values)); 2227 for (i = xs; i < xs + nx; i++) { 2228 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2229 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2230 for (j = ys; j < ys + ny; j++) { 2231 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2232 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2233 for (k = zs; k < zs + nz; k++) { 2234 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2235 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2236 2237 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2238 2239 for (l = 0; l < nc; l++) { 2240 cnt = 0; 2241 for (ii = istart; ii < iend + 1; ii++) { 2242 for (jj = jstart; jj < jend + 1; jj++) { 2243 for (kk = kstart; kk < kend + 1; kk++) { 2244 if (ii || jj || kk) { 2245 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2246 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2247 } 2248 } else { 2249 if (dfill) { 2250 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2251 } else { 2252 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2253 } 2254 } 2255 } 2256 } 2257 } 2258 row = l + nc * (slot); 2259 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2260 } 2261 } 2262 } 2263 } 2264 PetscCall(PetscFree(values)); 2265 /* do not copy values to GPU since they are all zero and not yet needed there */ 2266 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2267 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2268 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2269 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2270 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2271 } 2272 PetscCall(PetscFree(cols)); 2273 PetscFunctionReturn(PETSC_SUCCESS); 2274 } 2275