1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 72 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 73 columns to the left with any nonzeros in them plus 1 */ 74 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 75 for (i = 0; i < dd->w; i++) { 76 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 77 } 78 for (i = 0; i < dd->w; i++) { 79 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /*@ 85 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 86 of the matrix returned by `DMCreateMatrix()`. 87 88 Logically Collective 89 90 Input Parameters: 91 + da - the distributed array 92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 93 - ofill - the fill pattern in the off-diagonal blocks 94 95 Level: developer 96 97 Notes: 98 This only makes sense when you are doing multicomponent problems but using the 99 `MATMPIAIJ` matrix format 100 101 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 102 representing coupling and 0 entries for missing coupling. For example 103 .vb 104 dfill[9] = {1, 0, 0, 105 1, 1, 0, 106 0, 1, 1} 107 .ve 108 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 109 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 110 diagonal block). 111 112 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 113 can be represented in the dfill, ofill format 114 115 Contributed by Glenn Hammond 116 117 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the distributed array 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by Philip C. Roth 169 170 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 171 @*/ 172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 173 { 174 DM_DA *dd = (DM_DA *)da->data; 175 176 PetscFunctionBegin; 177 /* save the given dfill and ofill information */ 178 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 179 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 180 181 /* count nonzeros in ofill columns */ 182 PetscCall(DMDASetBlockFills_Private2(dd)); 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 187 { 188 PetscInt dim, m, n, p, nc; 189 DMBoundaryType bx, by, bz; 190 MPI_Comm comm; 191 PetscMPIInt size; 192 PetscBool isBAIJ; 193 DM_DA *dd = (DM_DA *)da->data; 194 195 PetscFunctionBegin; 196 /* 197 m 198 ------------------------------------------------------ 199 | | 200 | | 201 | ---------------------- | 202 | | | | 203 n | yn | | | 204 | | | | 205 | .--------------------- | 206 | (xs,ys) xn | 207 | . | 208 | (gxs,gys) | 209 | | 210 ----------------------------------------------------- 211 */ 212 213 /* 214 nc - number of components per grid point 215 col - number of colors needed in one direction for single component problem 216 217 */ 218 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 219 220 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 221 PetscCallMPI(MPI_Comm_size(comm, &size)); 222 if (ctype == IS_COLORING_LOCAL) { 223 if (size == 1) { 224 ctype = IS_COLORING_GLOBAL; 225 } else { 226 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 227 } 228 } 229 230 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 231 matrices is for the blocks, not the individual matrix elements */ 232 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 233 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 235 if (isBAIJ) { 236 dd->w = 1; 237 dd->xs = dd->xs / nc; 238 dd->xe = dd->xe / nc; 239 dd->Xs = dd->Xs / nc; 240 dd->Xe = dd->Xe / nc; 241 } 242 243 /* 244 We do not provide a getcoloring function in the DMDA operations because 245 the basic DMDA does not know about matrices. We think of DMDA as being 246 more low-level then matrices. 247 */ 248 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 249 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 251 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 252 if (isBAIJ) { 253 dd->w = nc; 254 dd->xs = dd->xs * nc; 255 dd->xe = dd->xe * nc; 256 dd->Xs = dd->Xs * nc; 257 dd->Xe = dd->Xe * nc; 258 } 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 263 { 264 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 265 PetscInt ncolors = 0; 266 MPI_Comm comm; 267 DMBoundaryType bx, by; 268 DMDAStencilType st; 269 ISColoringValue *colors; 270 DM_DA *dd = (DM_DA *)da->data; 271 272 PetscFunctionBegin; 273 /* 274 nc - number of components per grid point 275 col - number of colors needed in one direction for single component problem 276 277 */ 278 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 279 col = 2 * s + 1; 280 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 281 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 282 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 283 284 /* special case as taught to us by Paul Hovland */ 285 if (st == DMDA_STENCIL_STAR && s == 1) { 286 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 287 } else { 288 if (ctype == IS_COLORING_GLOBAL) { 289 if (!dd->localcoloring) { 290 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 291 ii = 0; 292 for (j = ys; j < ys + ny; j++) { 293 for (i = xs; i < xs + nx; i++) { 294 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 295 } 296 } 297 ncolors = nc + nc * (col - 1 + col * (col - 1)); 298 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 299 } 300 *coloring = dd->localcoloring; 301 } else if (ctype == IS_COLORING_LOCAL) { 302 if (!dd->ghostedcoloring) { 303 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 304 ii = 0; 305 for (j = gys; j < gys + gny; j++) { 306 for (i = gxs; i < gxs + gnx; i++) { 307 for (k = 0; k < nc; k++) { 308 /* the complicated stuff is to handle periodic boundaries */ 309 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 310 } 311 } 312 } 313 ncolors = nc + nc * (col - 1 + col * (col - 1)); 314 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 315 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 316 317 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 318 } 319 *coloring = dd->ghostedcoloring; 320 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 321 } 322 PetscCall(ISColoringReference(*coloring)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 327 { 328 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 329 PetscInt ncolors; 330 MPI_Comm comm; 331 DMBoundaryType bx, by, bz; 332 DMDAStencilType st; 333 ISColoringValue *colors; 334 DM_DA *dd = (DM_DA *)da->data; 335 336 PetscFunctionBegin; 337 /* 338 nc - number of components per grid point 339 col - number of colors needed in one direction for single component problem 340 */ 341 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 342 col = 2 * s + 1; 343 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 344 by 2*stencil_width + 1\n"); 345 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 346 by 2*stencil_width + 1\n"); 347 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 348 by 2*stencil_width + 1\n"); 349 350 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 351 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 352 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 353 354 /* create the coloring */ 355 if (ctype == IS_COLORING_GLOBAL) { 356 if (!dd->localcoloring) { 357 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 358 ii = 0; 359 for (k = zs; k < zs + nz; k++) { 360 for (j = ys; j < ys + ny; j++) { 361 for (i = xs; i < xs + nx; i++) { 362 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 363 } 364 } 365 } 366 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 367 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 368 } 369 *coloring = dd->localcoloring; 370 } else if (ctype == IS_COLORING_LOCAL) { 371 if (!dd->ghostedcoloring) { 372 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 373 ii = 0; 374 for (k = gzs; k < gzs + gnz; k++) { 375 for (j = gys; j < gys + gny; j++) { 376 for (i = gxs; i < gxs + gnx; i++) { 377 for (l = 0; l < nc; l++) { 378 /* the complicated stuff is to handle periodic boundaries */ 379 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 385 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 386 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 387 } 388 *coloring = dd->ghostedcoloring; 389 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 390 PetscCall(ISColoringReference(*coloring)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 395 { 396 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 397 PetscInt ncolors; 398 MPI_Comm comm; 399 DMBoundaryType bx; 400 ISColoringValue *colors; 401 DM_DA *dd = (DM_DA *)da->data; 402 403 PetscFunctionBegin; 404 /* 405 nc - number of components per grid point 406 col - number of colors needed in one direction for single component problem 407 */ 408 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 409 col = 2 * s + 1; 410 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 411 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 412 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 413 414 /* create the coloring */ 415 if (ctype == IS_COLORING_GLOBAL) { 416 if (!dd->localcoloring) { 417 PetscCall(PetscMalloc1(nc * nx, &colors)); 418 if (dd->ofillcols) { 419 PetscInt tc = 0; 420 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 421 i1 = 0; 422 for (i = xs; i < xs + nx; i++) { 423 for (l = 0; l < nc; l++) { 424 if (dd->ofillcols[l] && (i % col)) { 425 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 426 } else { 427 colors[i1++] = l; 428 } 429 } 430 } 431 ncolors = nc + 2 * s * tc; 432 } else { 433 i1 = 0; 434 for (i = xs; i < xs + nx; i++) { 435 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 436 } 437 ncolors = nc + nc * (col - 1); 438 } 439 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 440 } 441 *coloring = dd->localcoloring; 442 } else if (ctype == IS_COLORING_LOCAL) { 443 if (!dd->ghostedcoloring) { 444 PetscCall(PetscMalloc1(nc * gnx, &colors)); 445 i1 = 0; 446 for (i = gxs; i < gxs + gnx; i++) { 447 for (l = 0; l < nc; l++) { 448 /* the complicated stuff is to handle periodic boundaries */ 449 colors[i1++] = l + nc * (SetInRange(i, m) % col); 450 } 451 } 452 ncolors = nc + nc * (col - 1); 453 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 454 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 455 } 456 *coloring = dd->ghostedcoloring; 457 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 458 PetscCall(ISColoringReference(*coloring)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 463 { 464 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 465 PetscInt ncolors; 466 MPI_Comm comm; 467 DMBoundaryType bx, by; 468 ISColoringValue *colors; 469 DM_DA *dd = (DM_DA *)da->data; 470 471 PetscFunctionBegin; 472 /* 473 nc - number of components per grid point 474 col - number of colors needed in one direction for single component problem 475 */ 476 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 477 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 478 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 479 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 480 /* create the coloring */ 481 if (ctype == IS_COLORING_GLOBAL) { 482 if (!dd->localcoloring) { 483 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 484 ii = 0; 485 for (j = ys; j < ys + ny; j++) { 486 for (i = xs; i < xs + nx; i++) { 487 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 488 } 489 } 490 ncolors = 5 * nc; 491 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 492 } 493 *coloring = dd->localcoloring; 494 } else if (ctype == IS_COLORING_LOCAL) { 495 if (!dd->ghostedcoloring) { 496 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 497 ii = 0; 498 for (j = gys; j < gys + gny; j++) { 499 for (i = gxs; i < gxs + gnx; i++) { 500 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 501 } 502 } 503 ncolors = 5 * nc; 504 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 505 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 506 } 507 *coloring = dd->ghostedcoloring; 508 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 /* =========================================================================== */ 513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 527 528 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 529 { 530 DM da; 531 const char *prefix; 532 Mat AA, Anatural; 533 AO ao; 534 PetscInt rstart, rend, *petsc, i; 535 IS is; 536 MPI_Comm comm; 537 PetscViewerFormat format; 538 PetscBool flag; 539 540 PetscFunctionBegin; 541 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 542 PetscCall(PetscViewerGetFormat(viewer, &format)); 543 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 544 545 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 546 PetscCall(MatGetDM(A, &da)); 547 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 548 549 PetscCall(DMDAGetAO(da, &ao)); 550 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 551 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 552 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 553 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 554 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 555 556 /* call viewer on natural ordering */ 557 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 558 if (flag) { 559 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 560 A = AA; 561 } 562 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 563 PetscCall(ISDestroy(&is)); 564 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 565 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 566 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 567 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 568 PetscCall(MatView(Anatural, viewer)); 569 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 570 PetscCall(MatDestroy(&Anatural)); 571 if (flag) PetscCall(MatDestroy(&AA)); 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 576 { 577 DM da; 578 Mat Anatural, Aapp; 579 AO ao; 580 PetscInt rstart, rend, *app, i, m, n, M, N; 581 IS is; 582 MPI_Comm comm; 583 584 PetscFunctionBegin; 585 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 586 PetscCall(MatGetDM(A, &da)); 587 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 588 589 /* Load the matrix in natural ordering */ 590 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 591 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 592 PetscCall(MatGetSize(A, &M, &N)); 593 PetscCall(MatGetLocalSize(A, &m, &n)); 594 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 595 PetscCall(MatLoad(Anatural, viewer)); 596 597 /* Map natural ordering to application ordering and create IS */ 598 PetscCall(DMDAGetAO(da, &ao)); 599 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 600 PetscCall(PetscMalloc1(rend - rstart, &app)); 601 for (i = rstart; i < rend; i++) app[i - rstart] = i; 602 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 603 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 604 605 /* Do permutation and replace header */ 606 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 607 PetscCall(MatHeaderReplace(A, &Aapp)); 608 PetscCall(ISDestroy(&is)); 609 PetscCall(MatDestroy(&Anatural)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 614 { 615 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 616 Mat A; 617 MPI_Comm comm; 618 MatType Atype; 619 MatType mtype; 620 PetscMPIInt size; 621 DM_DA *dd = (DM_DA *)da->data; 622 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 623 624 PetscFunctionBegin; 625 PetscCall(MatInitializePackage()); 626 mtype = da->mattype; 627 628 /* 629 m 630 ------------------------------------------------------ 631 | | 632 | | 633 | ---------------------- | 634 | | | | 635 n | ny | | | 636 | | | | 637 | .--------------------- | 638 | (xs,ys) nx | 639 | . | 640 | (gxs,gys) | 641 | | 642 ----------------------------------------------------- 643 */ 644 645 /* 646 nc - number of components per grid point 647 col - number of colors needed in one direction for single component problem 648 */ 649 M = dd->M; 650 N = dd->N; 651 P = dd->P; 652 dim = da->dim; 653 dof = dd->w; 654 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 655 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 656 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 657 PetscCall(MatCreate(comm, &A)); 658 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 659 PetscCall(MatSetType(A, mtype)); 660 PetscCall(MatSetFromOptions(A)); 661 if (dof * nx * ny * nz < da->bind_below) { 662 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 663 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 664 } 665 PetscCall(MatSetDM(A, da)); 666 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 667 PetscCall(MatGetType(A, &Atype)); 668 /* 669 We do not provide a getmatrix function in the DMDA operations because 670 the basic DMDA does not know about matrices. We think of DMDA as being more 671 more low-level than matrices. This is kind of cheating but, cause sometimes 672 we think of DMDA has higher level than matrices. 673 674 We could switch based on Atype (or mtype), but we do not since the 675 specialized setting routines depend only on the particular preallocation 676 details of the matrix, not the type itself. 677 */ 678 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 679 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 680 if (!aij) { 681 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 682 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 683 if (!baij) { 684 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 685 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 686 if (!sbaij) { 687 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 688 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 689 } 690 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 691 } 692 } 693 if (aij) { 694 if (dim == 1) { 695 if (dd->ofill) { 696 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 697 } else { 698 DMBoundaryType bx; 699 PetscMPIInt size; 700 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 701 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 702 if (size == 1 && bx == DM_BOUNDARY_NONE) { 703 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 704 } else { 705 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 706 } 707 } 708 } else if (dim == 2) { 709 if (dd->ofill) { 710 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 711 } else { 712 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 713 } 714 } else if (dim == 3) { 715 if (dd->ofill) { 716 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 717 } else { 718 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 719 } 720 } 721 } else if (baij) { 722 if (dim == 2) { 723 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 724 } else if (dim == 3) { 725 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 726 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 727 } else if (sbaij) { 728 if (dim == 2) { 729 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 730 } else if (dim == 3) { 731 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 732 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 733 } else if (sell) { 734 if (dim == 2) { 735 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 736 } else if (dim == 3) { 737 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 738 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 739 } else if (is) { 740 PetscCall(DMCreateMatrix_DA_IS(da, A)); 741 } else { 742 ISLocalToGlobalMapping ltog; 743 744 PetscCall(MatSetBlockSize(A, dof)); 745 PetscCall(MatSetUp(A)); 746 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 747 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 748 } 749 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 750 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 751 PetscCall(MatSetDM(A, da)); 752 PetscCallMPI(MPI_Comm_size(comm, &size)); 753 if (size > 1) { 754 /* change viewer to display matrix in natural ordering */ 755 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 756 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 757 } 758 *J = A; 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 763 764 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 765 { 766 DM_DA *da = (DM_DA *)dm->data; 767 Mat lJ, P; 768 ISLocalToGlobalMapping ltog; 769 IS is; 770 PetscBT bt; 771 const PetscInt *e_loc, *idx; 772 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 773 774 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 775 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 776 PetscFunctionBegin; 777 dof = da->w; 778 PetscCall(MatSetBlockSize(J, dof)); 779 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 780 781 /* flag local elements indices in local DMDA numbering */ 782 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 783 PetscCall(PetscBTCreate(nv / dof, &bt)); 784 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 785 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 786 787 /* filter out (set to -1) the global indices not used by the local elements */ 788 PetscCall(PetscMalloc1(nv / dof, &gidx)); 789 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 790 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 791 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 792 for (i = 0; i < nv / dof; i++) 793 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 794 PetscCall(PetscBTDestroy(&bt)); 795 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 796 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 797 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 798 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 799 PetscCall(ISDestroy(&is)); 800 801 /* Preallocation */ 802 if (dm->prealloc_skip) { 803 PetscCall(MatSetUp(J)); 804 } else { 805 PetscCall(MatISGetLocalMat(J, &lJ)); 806 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 807 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 808 PetscCall(MatSetType(P, MATPREALLOCATOR)); 809 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 810 PetscCall(MatGetSize(lJ, &N, NULL)); 811 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 812 PetscCall(MatSetSizes(P, n, n, N, N)); 813 PetscCall(MatSetBlockSize(P, dof)); 814 PetscCall(MatSetUp(P)); 815 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 816 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 817 PetscCall(MatISRestoreLocalMat(J, &lJ)); 818 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 819 PetscCall(MatDestroy(&P)); 820 821 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 822 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 823 } 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 828 { 829 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 830 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 831 MPI_Comm comm; 832 PetscScalar *values; 833 DMBoundaryType bx, by; 834 ISLocalToGlobalMapping ltog; 835 DMDAStencilType st; 836 837 PetscFunctionBegin; 838 /* 839 nc - number of components per grid point 840 col - number of colors needed in one direction for single component problem 841 */ 842 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 843 col = 2 * s + 1; 844 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 845 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 846 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 847 848 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 849 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 850 851 PetscCall(MatSetBlockSize(J, nc)); 852 /* determine the matrix preallocation information */ 853 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 854 for (i = xs; i < xs + nx; i++) { 855 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 856 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 857 858 for (j = ys; j < ys + ny; j++) { 859 slot = i - gxs + gnx * (j - gys); 860 861 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 862 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 863 864 cnt = 0; 865 for (k = 0; k < nc; k++) { 866 for (l = lstart; l < lend + 1; l++) { 867 for (p = pstart; p < pend + 1; p++) { 868 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 869 cols[cnt++] = k + nc * (slot + gnx * l + p); 870 } 871 } 872 } 873 rows[k] = k + nc * (slot); 874 } 875 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 876 } 877 } 878 PetscCall(MatSetBlockSize(J, nc)); 879 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 880 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 881 MatPreallocateEnd(dnz, onz); 882 883 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 884 885 /* 886 For each node in the grid: we get the neighbors in the local (on processor ordering 887 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 888 PETSc ordering. 889 */ 890 if (!da->prealloc_only) { 891 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 892 for (i = xs; i < xs + nx; i++) { 893 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 894 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 895 896 for (j = ys; j < ys + ny; j++) { 897 slot = i - gxs + gnx * (j - gys); 898 899 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 900 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 901 902 cnt = 0; 903 for (k = 0; k < nc; k++) { 904 for (l = lstart; l < lend + 1; l++) { 905 for (p = pstart; p < pend + 1; p++) { 906 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 907 cols[cnt++] = k + nc * (slot + gnx * l + p); 908 } 909 } 910 } 911 rows[k] = k + nc * (slot); 912 } 913 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 914 } 915 } 916 PetscCall(PetscFree(values)); 917 /* do not copy values to GPU since they are all zero and not yet needed there */ 918 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 919 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 920 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 921 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 922 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 923 } 924 PetscCall(PetscFree2(rows, cols)); 925 PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 929 { 930 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 931 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 932 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 933 MPI_Comm comm; 934 PetscScalar *values; 935 DMBoundaryType bx, by, bz; 936 ISLocalToGlobalMapping ltog; 937 DMDAStencilType st; 938 939 PetscFunctionBegin; 940 /* 941 nc - number of components per grid point 942 col - number of colors needed in one direction for single component problem 943 */ 944 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 945 col = 2 * s + 1; 946 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 947 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 948 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 949 950 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 951 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 952 953 PetscCall(MatSetBlockSize(J, nc)); 954 /* determine the matrix preallocation information */ 955 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 956 for (i = xs; i < xs + nx; i++) { 957 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 958 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 959 for (j = ys; j < ys + ny; j++) { 960 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 961 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 962 for (k = zs; k < zs + nz; k++) { 963 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 964 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 965 966 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 967 968 cnt = 0; 969 for (l = 0; l < nc; l++) { 970 for (ii = istart; ii < iend + 1; ii++) { 971 for (jj = jstart; jj < jend + 1; jj++) { 972 for (kk = kstart; kk < kend + 1; kk++) { 973 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 974 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 975 } 976 } 977 } 978 } 979 rows[l] = l + nc * (slot); 980 } 981 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 982 } 983 } 984 } 985 PetscCall(MatSetBlockSize(J, nc)); 986 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 987 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 988 MatPreallocateEnd(dnz, onz); 989 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 990 991 /* 992 For each node in the grid: we get the neighbors in the local (on processor ordering 993 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 994 PETSc ordering. 995 */ 996 if (!da->prealloc_only) { 997 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 998 for (i = xs; i < xs + nx; i++) { 999 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1000 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1001 for (j = ys; j < ys + ny; j++) { 1002 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1003 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1004 for (k = zs; k < zs + nz; k++) { 1005 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1006 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1007 1008 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1009 1010 cnt = 0; 1011 for (l = 0; l < nc; l++) { 1012 for (ii = istart; ii < iend + 1; ii++) { 1013 for (jj = jstart; jj < jend + 1; jj++) { 1014 for (kk = kstart; kk < kend + 1; kk++) { 1015 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1016 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1017 } 1018 } 1019 } 1020 } 1021 rows[l] = l + nc * (slot); 1022 } 1023 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1024 } 1025 } 1026 } 1027 PetscCall(PetscFree(values)); 1028 /* do not copy values to GPU since they are all zero and not yet needed there */ 1029 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1030 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1031 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1032 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1033 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1034 } 1035 PetscCall(PetscFree2(rows, cols)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } 1038 1039 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1040 { 1041 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1042 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1043 MPI_Comm comm; 1044 DMBoundaryType bx, by; 1045 ISLocalToGlobalMapping ltog, mltog; 1046 DMDAStencilType st; 1047 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1048 1049 PetscFunctionBegin; 1050 /* 1051 nc - number of components per grid point 1052 col - number of colors needed in one direction for single component problem 1053 */ 1054 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1055 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1056 col = 2 * s + 1; 1057 /* 1058 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1059 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1060 */ 1061 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1062 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1063 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1064 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1065 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1066 1067 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1068 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1069 1070 PetscCall(MatSetBlockSize(J, nc)); 1071 /* determine the matrix preallocation information */ 1072 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1073 for (i = xs; i < xs + nx; i++) { 1074 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1075 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1076 1077 for (j = ys; j < ys + ny; j++) { 1078 slot = i - gxs + gnx * (j - gys); 1079 1080 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1081 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1082 1083 cnt = 0; 1084 for (k = 0; k < nc; k++) { 1085 for (l = lstart; l < lend + 1; l++) { 1086 for (p = pstart; p < pend + 1; p++) { 1087 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1088 cols[cnt++] = k + nc * (slot + gnx * l + p); 1089 } 1090 } 1091 } 1092 rows[k] = k + nc * (slot); 1093 } 1094 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1095 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1096 } 1097 } 1098 PetscCall(MatSetBlockSize(J, nc)); 1099 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1100 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1101 MatPreallocateEnd(dnz, onz); 1102 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1103 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1104 1105 /* 1106 For each node in the grid: we get the neighbors in the local (on processor ordering 1107 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1108 PETSc ordering. 1109 */ 1110 if (!da->prealloc_only) { 1111 for (i = xs; i < xs + nx; i++) { 1112 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1113 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1114 1115 for (j = ys; j < ys + ny; j++) { 1116 slot = i - gxs + gnx * (j - gys); 1117 1118 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1119 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1120 1121 cnt = 0; 1122 for (l = lstart; l < lend + 1; l++) { 1123 for (p = pstart; p < pend + 1; p++) { 1124 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1125 cols[cnt++] = nc * (slot + gnx * l + p); 1126 for (k = 1; k < nc; k++) { 1127 cols[cnt] = 1 + cols[cnt - 1]; 1128 cnt++; 1129 } 1130 } 1131 } 1132 } 1133 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1134 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1135 } 1136 } 1137 /* do not copy values to GPU since they are all zero and not yet needed there */ 1138 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1139 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1140 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1141 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1142 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1143 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1144 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1145 } 1146 PetscCall(PetscFree2(rows, cols)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1151 { 1152 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1153 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1154 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1155 DM_DA *dd = (DM_DA *)da->data; 1156 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1157 MPI_Comm comm; 1158 DMBoundaryType bx, by; 1159 ISLocalToGlobalMapping ltog; 1160 DMDAStencilType st; 1161 PetscBool removedups = PETSC_FALSE; 1162 1163 PetscFunctionBegin; 1164 /* 1165 nc - number of components per grid point 1166 col - number of colors needed in one direction for single component problem 1167 */ 1168 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1169 col = 2 * s + 1; 1170 /* 1171 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1172 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1173 */ 1174 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1175 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1176 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1177 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1178 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1179 1180 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1181 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1182 1183 PetscCall(MatSetBlockSize(J, nc)); 1184 /* determine the matrix preallocation information */ 1185 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1186 for (i = xs; i < xs + nx; i++) { 1187 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1188 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1189 1190 for (j = ys; j < ys + ny; j++) { 1191 slot = i - gxs + gnx * (j - gys); 1192 1193 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1194 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1195 1196 for (k = 0; k < nc; k++) { 1197 cnt = 0; 1198 for (l = lstart; l < lend + 1; l++) { 1199 for (p = pstart; p < pend + 1; p++) { 1200 if (l || p) { 1201 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1202 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1203 } 1204 } else { 1205 if (dfill) { 1206 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1207 } else { 1208 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1209 } 1210 } 1211 } 1212 } 1213 row = k + nc * (slot); 1214 maxcnt = PetscMax(maxcnt, cnt); 1215 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1216 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1217 } 1218 } 1219 } 1220 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1221 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1222 MatPreallocateEnd(dnz, onz); 1223 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1224 1225 /* 1226 For each node in the grid: we get the neighbors in the local (on processor ordering 1227 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1228 PETSc ordering. 1229 */ 1230 if (!da->prealloc_only) { 1231 for (i = xs; i < xs + nx; i++) { 1232 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1233 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1234 1235 for (j = ys; j < ys + ny; j++) { 1236 slot = i - gxs + gnx * (j - gys); 1237 1238 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1239 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1240 1241 for (k = 0; k < nc; k++) { 1242 cnt = 0; 1243 for (l = lstart; l < lend + 1; l++) { 1244 for (p = pstart; p < pend + 1; p++) { 1245 if (l || p) { 1246 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1247 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1248 } 1249 } else { 1250 if (dfill) { 1251 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1252 } else { 1253 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1254 } 1255 } 1256 } 1257 } 1258 row = k + nc * (slot); 1259 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1260 } 1261 } 1262 } 1263 /* do not copy values to GPU since they are all zero and not yet needed there */ 1264 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1265 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1266 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1267 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1268 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1269 } 1270 PetscCall(PetscFree(cols)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 1274 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1275 { 1276 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1277 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1278 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1279 MPI_Comm comm; 1280 DMBoundaryType bx, by, bz; 1281 ISLocalToGlobalMapping ltog, mltog; 1282 DMDAStencilType st; 1283 PetscBool removedups = PETSC_FALSE; 1284 1285 PetscFunctionBegin; 1286 /* 1287 nc - number of components per grid point 1288 col - number of colors needed in one direction for single component problem 1289 */ 1290 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1291 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1292 col = 2 * s + 1; 1293 1294 /* 1295 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1296 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1297 */ 1298 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1299 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1300 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1301 1302 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1303 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1304 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1305 1306 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1307 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1308 1309 PetscCall(MatSetBlockSize(J, nc)); 1310 /* determine the matrix preallocation information */ 1311 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1312 for (i = xs; i < xs + nx; i++) { 1313 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1314 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1315 for (j = ys; j < ys + ny; j++) { 1316 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1317 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1318 for (k = zs; k < zs + nz; k++) { 1319 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1320 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1321 1322 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1323 1324 cnt = 0; 1325 for (l = 0; l < nc; l++) { 1326 for (ii = istart; ii < iend + 1; ii++) { 1327 for (jj = jstart; jj < jend + 1; jj++) { 1328 for (kk = kstart; kk < kend + 1; kk++) { 1329 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1330 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1331 } 1332 } 1333 } 1334 } 1335 rows[l] = l + nc * (slot); 1336 } 1337 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1338 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1339 } 1340 } 1341 } 1342 PetscCall(MatSetBlockSize(J, nc)); 1343 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1344 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1345 MatPreallocateEnd(dnz, onz); 1346 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1347 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1348 1349 /* 1350 For each node in the grid: we get the neighbors in the local (on processor ordering 1351 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1352 PETSc ordering. 1353 */ 1354 if (!da->prealloc_only) { 1355 for (i = xs; i < xs + nx; i++) { 1356 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1357 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1358 for (j = ys; j < ys + ny; j++) { 1359 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1360 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1361 for (k = zs; k < zs + nz; k++) { 1362 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1363 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1364 1365 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1366 1367 cnt = 0; 1368 for (kk = kstart; kk < kend + 1; kk++) { 1369 for (jj = jstart; jj < jend + 1; jj++) { 1370 for (ii = istart; ii < iend + 1; ii++) { 1371 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1372 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1373 for (l = 1; l < nc; l++) { 1374 cols[cnt] = 1 + cols[cnt - 1]; 1375 cnt++; 1376 } 1377 } 1378 } 1379 } 1380 } 1381 rows[0] = nc * (slot); 1382 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1383 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1384 } 1385 } 1386 } 1387 /* do not copy values to GPU since they are all zero and not yet needed there */ 1388 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1389 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1390 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1391 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1392 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1393 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1394 } 1395 PetscCall(PetscFree2(rows, cols)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1400 { 1401 DM_DA *dd = (DM_DA *)da->data; 1402 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1403 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1404 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1405 DMBoundaryType bx; 1406 ISLocalToGlobalMapping ltog; 1407 PetscMPIInt rank, size; 1408 1409 PetscFunctionBegin; 1410 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1411 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1412 1413 /* 1414 nc - number of components per grid point 1415 */ 1416 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1417 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1418 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1419 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1420 1421 PetscCall(MatSetBlockSize(J, nc)); 1422 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1423 1424 /* 1425 note should be smaller for first and last process with no periodic 1426 does not handle dfill 1427 */ 1428 cnt = 0; 1429 /* coupling with process to the left */ 1430 for (i = 0; i < s; i++) { 1431 for (j = 0; j < nc; j++) { 1432 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1433 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1434 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1435 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1436 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1437 } 1438 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1439 cnt++; 1440 } 1441 } 1442 for (i = s; i < nx - s; i++) { 1443 for (j = 0; j < nc; j++) { 1444 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1445 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1446 cnt++; 1447 } 1448 } 1449 /* coupling with process to the right */ 1450 for (i = nx - s; i < nx; i++) { 1451 for (j = 0; j < nc; j++) { 1452 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1453 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1454 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1455 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1456 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1457 } 1458 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1459 cnt++; 1460 } 1461 } 1462 1463 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1464 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1465 PetscCall(PetscFree2(cols, ocols)); 1466 1467 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1468 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1469 1470 /* 1471 For each node in the grid: we get the neighbors in the local (on processor ordering 1472 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1473 PETSc ordering. 1474 */ 1475 if (!da->prealloc_only) { 1476 PetscCall(PetscMalloc1(maxcnt, &cols)); 1477 row = xs * nc; 1478 /* coupling with process to the left */ 1479 for (i = xs; i < xs + s; i++) { 1480 for (j = 0; j < nc; j++) { 1481 cnt = 0; 1482 if (rank) { 1483 for (l = 0; l < s; l++) { 1484 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1485 } 1486 } 1487 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1488 for (l = 0; l < s; l++) { 1489 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1490 } 1491 } 1492 if (dfill) { 1493 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1494 } else { 1495 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1496 } 1497 for (l = 0; l < s; l++) { 1498 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1499 } 1500 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1501 row++; 1502 } 1503 } 1504 for (i = xs + s; i < xs + nx - s; i++) { 1505 for (j = 0; j < nc; j++) { 1506 cnt = 0; 1507 for (l = 0; l < s; l++) { 1508 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1509 } 1510 if (dfill) { 1511 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1512 } else { 1513 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1514 } 1515 for (l = 0; l < s; l++) { 1516 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1517 } 1518 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1519 row++; 1520 } 1521 } 1522 /* coupling with process to the right */ 1523 for (i = xs + nx - s; i < xs + nx; i++) { 1524 for (j = 0; j < nc; j++) { 1525 cnt = 0; 1526 for (l = 0; l < s; l++) { 1527 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1528 } 1529 if (dfill) { 1530 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1531 } else { 1532 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1533 } 1534 if (rank < size - 1) { 1535 for (l = 0; l < s; l++) { 1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1537 } 1538 } 1539 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1540 for (l = 0; l < s; l++) { 1541 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1542 } 1543 } 1544 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1545 row++; 1546 } 1547 } 1548 PetscCall(PetscFree(cols)); 1549 /* do not copy values to GPU since they are all zero and not yet needed there */ 1550 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1551 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1552 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1553 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1554 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1555 } 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1560 { 1561 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1562 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1563 PetscInt istart, iend; 1564 DMBoundaryType bx; 1565 ISLocalToGlobalMapping ltog, mltog; 1566 1567 PetscFunctionBegin; 1568 /* 1569 nc - number of components per grid point 1570 col - number of colors needed in one direction for single component problem 1571 */ 1572 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1573 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1574 col = 2 * s + 1; 1575 1576 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1577 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1578 1579 PetscCall(MatSetBlockSize(J, nc)); 1580 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1581 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1582 1583 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1584 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1585 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1586 1587 /* 1588 For each node in the grid: we get the neighbors in the local (on processor ordering 1589 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1590 PETSc ordering. 1591 */ 1592 if (!da->prealloc_only) { 1593 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1594 for (i = xs; i < xs + nx; i++) { 1595 istart = PetscMax(-s, gxs - i); 1596 iend = PetscMin(s, gxs + gnx - i - 1); 1597 slot = i - gxs; 1598 1599 cnt = 0; 1600 for (i1 = istart; i1 < iend + 1; i1++) { 1601 cols[cnt++] = nc * (slot + i1); 1602 for (l = 1; l < nc; l++) { 1603 cols[cnt] = 1 + cols[cnt - 1]; 1604 cnt++; 1605 } 1606 } 1607 rows[0] = nc * (slot); 1608 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1609 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1610 } 1611 /* do not copy values to GPU since they are all zero and not yet needed there */ 1612 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1613 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1614 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1615 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1616 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1617 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1618 PetscCall(PetscFree2(rows, cols)); 1619 } 1620 PetscFunctionReturn(PETSC_SUCCESS); 1621 } 1622 1623 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1624 { 1625 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1626 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1627 PetscInt istart, iend; 1628 DMBoundaryType bx; 1629 ISLocalToGlobalMapping ltog, mltog; 1630 1631 PetscFunctionBegin; 1632 /* 1633 nc - number of components per grid point 1634 col - number of colors needed in one direction for single component problem 1635 */ 1636 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1637 col = 2 * s + 1; 1638 1639 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1640 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1641 1642 PetscCall(MatSetBlockSize(J, nc)); 1643 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1644 1645 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1646 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1647 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1648 1649 /* 1650 For each node in the grid: we get the neighbors in the local (on processor ordering 1651 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1652 PETSc ordering. 1653 */ 1654 if (!da->prealloc_only) { 1655 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1656 for (i = xs; i < xs + nx; i++) { 1657 istart = PetscMax(-s, gxs - i); 1658 iend = PetscMin(s, gxs + gnx - i - 1); 1659 slot = i - gxs; 1660 1661 cnt = 0; 1662 for (i1 = istart; i1 < iend + 1; i1++) { 1663 cols[cnt++] = nc * (slot + i1); 1664 for (l = 1; l < nc; l++) { 1665 cols[cnt] = 1 + cols[cnt - 1]; 1666 cnt++; 1667 } 1668 } 1669 rows[0] = nc * (slot); 1670 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1671 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1672 } 1673 /* do not copy values to GPU since they are all zero and not yet needed there */ 1674 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1675 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1676 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1677 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1678 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1679 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1680 PetscCall(PetscFree2(rows, cols)); 1681 } 1682 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1687 { 1688 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1689 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1690 PetscInt istart, iend, jstart, jend, ii, jj; 1691 MPI_Comm comm; 1692 PetscScalar *values; 1693 DMBoundaryType bx, by; 1694 DMDAStencilType st; 1695 ISLocalToGlobalMapping ltog; 1696 1697 PetscFunctionBegin; 1698 /* 1699 nc - number of components per grid point 1700 col - number of colors needed in one direction for single component problem 1701 */ 1702 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1703 col = 2 * s + 1; 1704 1705 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1706 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1707 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1708 1709 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1710 1711 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1712 1713 /* determine the matrix preallocation information */ 1714 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1715 for (i = xs; i < xs + nx; i++) { 1716 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1717 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1718 for (j = ys; j < ys + ny; j++) { 1719 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1720 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1721 slot = i - gxs + gnx * (j - gys); 1722 1723 /* Find block columns in block row */ 1724 cnt = 0; 1725 for (ii = istart; ii < iend + 1; ii++) { 1726 for (jj = jstart; jj < jend + 1; jj++) { 1727 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1728 cols[cnt++] = slot + ii + gnx * jj; 1729 } 1730 } 1731 } 1732 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1733 } 1734 } 1735 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1736 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1737 MatPreallocateEnd(dnz, onz); 1738 1739 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1740 1741 /* 1742 For each node in the grid: we get the neighbors in the local (on processor ordering 1743 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1744 PETSc ordering. 1745 */ 1746 if (!da->prealloc_only) { 1747 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1748 for (i = xs; i < xs + nx; i++) { 1749 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1750 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1751 for (j = ys; j < ys + ny; j++) { 1752 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1753 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1754 slot = i - gxs + gnx * (j - gys); 1755 cnt = 0; 1756 for (ii = istart; ii < iend + 1; ii++) { 1757 for (jj = jstart; jj < jend + 1; jj++) { 1758 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1759 cols[cnt++] = slot + ii + gnx * jj; 1760 } 1761 } 1762 } 1763 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1764 } 1765 } 1766 PetscCall(PetscFree(values)); 1767 /* do not copy values to GPU since they are all zero and not yet needed there */ 1768 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1769 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1770 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1771 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1772 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1773 } 1774 PetscCall(PetscFree(cols)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1779 { 1780 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1781 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1782 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1783 MPI_Comm comm; 1784 PetscScalar *values; 1785 DMBoundaryType bx, by, bz; 1786 DMDAStencilType st; 1787 ISLocalToGlobalMapping ltog; 1788 1789 PetscFunctionBegin; 1790 /* 1791 nc - number of components per grid point 1792 col - number of colors needed in one direction for single component problem 1793 */ 1794 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1795 col = 2 * s + 1; 1796 1797 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1798 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1799 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1800 1801 PetscCall(PetscMalloc1(col * col * col, &cols)); 1802 1803 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1804 1805 /* determine the matrix preallocation information */ 1806 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1807 for (i = xs; i < xs + nx; i++) { 1808 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1809 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1810 for (j = ys; j < ys + ny; j++) { 1811 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1812 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1813 for (k = zs; k < zs + nz; k++) { 1814 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1815 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1816 1817 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1818 1819 /* Find block columns in block row */ 1820 cnt = 0; 1821 for (ii = istart; ii < iend + 1; ii++) { 1822 for (jj = jstart; jj < jend + 1; jj++) { 1823 for (kk = kstart; kk < kend + 1; kk++) { 1824 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1825 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1826 } 1827 } 1828 } 1829 } 1830 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1831 } 1832 } 1833 } 1834 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1835 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1836 MatPreallocateEnd(dnz, onz); 1837 1838 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1839 1840 /* 1841 For each node in the grid: we get the neighbors in the local (on processor ordering 1842 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1843 PETSc ordering. 1844 */ 1845 if (!da->prealloc_only) { 1846 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1847 for (i = xs; i < xs + nx; i++) { 1848 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1849 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1850 for (j = ys; j < ys + ny; j++) { 1851 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1852 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1853 for (k = zs; k < zs + nz; k++) { 1854 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1855 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1856 1857 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1858 1859 cnt = 0; 1860 for (ii = istart; ii < iend + 1; ii++) { 1861 for (jj = jstart; jj < jend + 1; jj++) { 1862 for (kk = kstart; kk < kend + 1; kk++) { 1863 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1864 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1865 } 1866 } 1867 } 1868 } 1869 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1870 } 1871 } 1872 } 1873 PetscCall(PetscFree(values)); 1874 /* do not copy values to GPU since they are all zero and not yet needed there */ 1875 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1876 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1877 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1878 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1879 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1880 } 1881 PetscCall(PetscFree(cols)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 /* 1886 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1887 identify in the local ordering with periodic domain. 1888 */ 1889 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1890 { 1891 PetscInt i, n; 1892 1893 PetscFunctionBegin; 1894 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1895 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1896 for (i = 0, n = 0; i < *cnt; i++) { 1897 if (col[i] >= *row) col[n++] = col[i]; 1898 } 1899 *cnt = n; 1900 PetscFunctionReturn(PETSC_SUCCESS); 1901 } 1902 1903 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1904 { 1905 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1906 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1907 PetscInt istart, iend, jstart, jend, ii, jj; 1908 MPI_Comm comm; 1909 PetscScalar *values; 1910 DMBoundaryType bx, by; 1911 DMDAStencilType st; 1912 ISLocalToGlobalMapping ltog; 1913 1914 PetscFunctionBegin; 1915 /* 1916 nc - number of components per grid point 1917 col - number of colors needed in one direction for single component problem 1918 */ 1919 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1920 col = 2 * s + 1; 1921 1922 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1923 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1924 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1925 1926 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1927 1928 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1929 1930 /* determine the matrix preallocation information */ 1931 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1932 for (i = xs; i < xs + nx; i++) { 1933 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1934 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1935 for (j = ys; j < ys + ny; j++) { 1936 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1937 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1938 slot = i - gxs + gnx * (j - gys); 1939 1940 /* Find block columns in block row */ 1941 cnt = 0; 1942 for (ii = istart; ii < iend + 1; ii++) { 1943 for (jj = jstart; jj < jend + 1; jj++) { 1944 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1945 } 1946 } 1947 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1948 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1949 } 1950 } 1951 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1952 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1953 MatPreallocateEnd(dnz, onz); 1954 1955 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1956 1957 /* 1958 For each node in the grid: we get the neighbors in the local (on processor ordering 1959 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1960 PETSc ordering. 1961 */ 1962 if (!da->prealloc_only) { 1963 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1964 for (i = xs; i < xs + nx; i++) { 1965 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1966 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1967 for (j = ys; j < ys + ny; j++) { 1968 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1969 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1970 slot = i - gxs + gnx * (j - gys); 1971 1972 /* Find block columns in block row */ 1973 cnt = 0; 1974 for (ii = istart; ii < iend + 1; ii++) { 1975 for (jj = jstart; jj < jend + 1; jj++) { 1976 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1977 } 1978 } 1979 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1980 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1981 } 1982 } 1983 PetscCall(PetscFree(values)); 1984 /* do not copy values to GPU since they are all zero and not yet needed there */ 1985 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1986 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1987 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1988 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1989 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1990 } 1991 PetscCall(PetscFree(cols)); 1992 PetscFunctionReturn(PETSC_SUCCESS); 1993 } 1994 1995 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1996 { 1997 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1998 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1999 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2000 MPI_Comm comm; 2001 PetscScalar *values; 2002 DMBoundaryType bx, by, bz; 2003 DMDAStencilType st; 2004 ISLocalToGlobalMapping ltog; 2005 2006 PetscFunctionBegin; 2007 /* 2008 nc - number of components per grid point 2009 col - number of colors needed in one direction for single component problem 2010 */ 2011 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2012 col = 2 * s + 1; 2013 2014 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2015 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2016 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2017 2018 /* create the matrix */ 2019 PetscCall(PetscMalloc1(col * col * col, &cols)); 2020 2021 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2022 2023 /* determine the matrix preallocation information */ 2024 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2025 for (i = xs; i < xs + nx; i++) { 2026 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2027 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2028 for (j = ys; j < ys + ny; j++) { 2029 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2030 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2031 for (k = zs; k < zs + nz; k++) { 2032 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2033 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2034 2035 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2036 2037 /* Find block columns in block row */ 2038 cnt = 0; 2039 for (ii = istart; ii < iend + 1; ii++) { 2040 for (jj = jstart; jj < jend + 1; jj++) { 2041 for (kk = kstart; kk < kend + 1; kk++) { 2042 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2043 } 2044 } 2045 } 2046 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2047 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2048 } 2049 } 2050 } 2051 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2052 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2053 MatPreallocateEnd(dnz, onz); 2054 2055 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2056 2057 /* 2058 For each node in the grid: we get the neighbors in the local (on processor ordering 2059 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2060 PETSc ordering. 2061 */ 2062 if (!da->prealloc_only) { 2063 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2064 for (i = xs; i < xs + nx; i++) { 2065 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2066 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2067 for (j = ys; j < ys + ny; j++) { 2068 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2069 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2070 for (k = zs; k < zs + nz; k++) { 2071 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2072 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2073 2074 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2075 2076 cnt = 0; 2077 for (ii = istart; ii < iend + 1; ii++) { 2078 for (jj = jstart; jj < jend + 1; jj++) { 2079 for (kk = kstart; kk < kend + 1; kk++) { 2080 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2081 } 2082 } 2083 } 2084 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2085 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2086 } 2087 } 2088 } 2089 PetscCall(PetscFree(values)); 2090 /* do not copy values to GPU since they are all zero and not yet needed there */ 2091 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2092 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2093 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2094 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2095 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2096 } 2097 PetscCall(PetscFree(cols)); 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2102 { 2103 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2104 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2105 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2106 DM_DA *dd = (DM_DA *)da->data; 2107 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2108 MPI_Comm comm; 2109 PetscScalar *values; 2110 DMBoundaryType bx, by, bz; 2111 ISLocalToGlobalMapping ltog; 2112 DMDAStencilType st; 2113 PetscBool removedups = PETSC_FALSE; 2114 2115 PetscFunctionBegin; 2116 /* 2117 nc - number of components per grid point 2118 col - number of colors needed in one direction for single component problem 2119 */ 2120 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2121 col = 2 * s + 1; 2122 2123 /* 2124 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2125 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2126 */ 2127 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2128 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2129 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2130 2131 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2132 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2133 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2134 2135 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2136 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2137 2138 /* determine the matrix preallocation information */ 2139 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2140 2141 PetscCall(MatSetBlockSize(J, nc)); 2142 for (i = xs; i < xs + nx; i++) { 2143 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2144 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2145 for (j = ys; j < ys + ny; j++) { 2146 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2147 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2148 for (k = zs; k < zs + nz; k++) { 2149 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2150 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2151 2152 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2153 2154 for (l = 0; l < nc; l++) { 2155 cnt = 0; 2156 for (ii = istart; ii < iend + 1; ii++) { 2157 for (jj = jstart; jj < jend + 1; jj++) { 2158 for (kk = kstart; kk < kend + 1; kk++) { 2159 if (ii || jj || kk) { 2160 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2161 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2162 } 2163 } else { 2164 if (dfill) { 2165 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2166 } else { 2167 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2168 } 2169 } 2170 } 2171 } 2172 } 2173 row = l + nc * (slot); 2174 maxcnt = PetscMax(maxcnt, cnt); 2175 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2176 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2177 } 2178 } 2179 } 2180 } 2181 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2182 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2183 MatPreallocateEnd(dnz, onz); 2184 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2185 2186 /* 2187 For each node in the grid: we get the neighbors in the local (on processor ordering 2188 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2189 PETSc ordering. 2190 */ 2191 if (!da->prealloc_only) { 2192 PetscCall(PetscCalloc1(maxcnt, &values)); 2193 for (i = xs; i < xs + nx; i++) { 2194 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2195 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2196 for (j = ys; j < ys + ny; j++) { 2197 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2198 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2199 for (k = zs; k < zs + nz; k++) { 2200 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2201 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2202 2203 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2204 2205 for (l = 0; l < nc; l++) { 2206 cnt = 0; 2207 for (ii = istart; ii < iend + 1; ii++) { 2208 for (jj = jstart; jj < jend + 1; jj++) { 2209 for (kk = kstart; kk < kend + 1; kk++) { 2210 if (ii || jj || kk) { 2211 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2212 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2213 } 2214 } else { 2215 if (dfill) { 2216 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2217 } else { 2218 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2219 } 2220 } 2221 } 2222 } 2223 } 2224 row = l + nc * (slot); 2225 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2226 } 2227 } 2228 } 2229 } 2230 PetscCall(PetscFree(values)); 2231 /* do not copy values to GPU since they are all zero and not yet needed there */ 2232 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2233 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2234 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2235 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2236 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2237 } 2238 PetscCall(PetscFree(cols)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241