1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 72 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 73 columns to the left with any nonzeros in them plus 1 */ 74 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 75 for (i = 0; i < dd->w; i++) { 76 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 77 } 78 for (i = 0; i < dd->w; i++) { 79 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /*@ 85 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 86 of the matrix returned by `DMCreateMatrix()`. 87 88 Logically Collective 89 90 Input Parameters: 91 + da - the distributed array 92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 93 - ofill - the fill pattern in the off-diagonal blocks 94 95 Level: developer 96 97 Notes: 98 This only makes sense when you are doing multicomponent problems but using the 99 `MATMPIAIJ` matrix format 100 101 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 102 representing coupling and 0 entries for missing coupling. For example 103 .vb 104 dfill[9] = {1, 0, 0, 105 1, 1, 0, 106 0, 1, 1} 107 .ve 108 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 109 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 110 diagonal block). 111 112 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 113 can be represented in the dfill, ofill format 114 115 Contributed by Glenn Hammond 116 117 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by `DMCreateMatrix()`, using sparse representations 136 of fill patterns. 137 138 Logically Collective 139 140 Input Parameters: 141 + da - the distributed array 142 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 143 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: 148 This only makes sense when you are doing multicomponent problems but using the 149 `MATMPIAIJ` matrix format 150 151 The format for `dfill` and `ofill` is a sparse representation of a 152 dof-by-dof matrix with 1 entries representing coupling and 0 entries 153 for missing coupling. The sparse representation is a 1 dimensional 154 array of length nz + dof + 1, where nz is the number of non-zeros in 155 the matrix. The first dof entries in the array give the 156 starting array indices of each row's items in the rest of the array, 157 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 158 and the remaining nz items give the column indices of each of 159 the 1s within the logical 2D matrix. Each row's items within 160 the array are the column indices of the 1s within that row 161 of the 2D matrix. PETSc developers may recognize that this is the 162 same format as that computed by the `DMDASetBlockFills_Private()` 163 function from a dense 2D matrix representation. 164 165 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 166 can be represented in the `dfill`, `ofill` format 167 168 Contributed by Philip C. Roth 169 170 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 171 @*/ 172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 173 { 174 DM_DA *dd = (DM_DA *)da->data; 175 176 PetscFunctionBegin; 177 /* save the given dfill and ofill information */ 178 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 179 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 180 181 /* count nonzeros in ofill columns */ 182 PetscCall(DMDASetBlockFills_Private2(dd)); 183 PetscFunctionReturn(PETSC_SUCCESS); 184 } 185 186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 187 { 188 PetscInt dim, m, n, p, nc; 189 DMBoundaryType bx, by, bz; 190 MPI_Comm comm; 191 PetscMPIInt size; 192 PetscBool isBAIJ; 193 DM_DA *dd = (DM_DA *)da->data; 194 195 PetscFunctionBegin; 196 /* 197 m 198 ------------------------------------------------------ 199 | | 200 | | 201 | ---------------------- | 202 | | | | 203 n | yn | | | 204 | | | | 205 | .--------------------- | 206 | (xs,ys) xn | 207 | . | 208 | (gxs,gys) | 209 | | 210 ----------------------------------------------------- 211 */ 212 213 /* 214 nc - number of components per grid point 215 col - number of colors needed in one direction for single component problem 216 217 */ 218 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 219 220 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 221 PetscCallMPI(MPI_Comm_size(comm, &size)); 222 if (ctype == IS_COLORING_LOCAL) { 223 if (size == 1) { 224 ctype = IS_COLORING_GLOBAL; 225 } else { 226 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 227 } 228 } 229 230 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 231 matrices is for the blocks, not the individual matrix elements */ 232 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 233 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 235 if (isBAIJ) { 236 dd->w = 1; 237 dd->xs = dd->xs / nc; 238 dd->xe = dd->xe / nc; 239 dd->Xs = dd->Xs / nc; 240 dd->Xe = dd->Xe / nc; 241 } 242 243 /* 244 We do not provide a getcoloring function in the DMDA operations because 245 the basic DMDA does not know about matrices. We think of DMDA as being 246 more low-level then matrices. 247 */ 248 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 249 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 251 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 252 if (isBAIJ) { 253 dd->w = nc; 254 dd->xs = dd->xs * nc; 255 dd->xe = dd->xe * nc; 256 dd->Xs = dd->Xs * nc; 257 dd->Xe = dd->Xe * nc; 258 } 259 PetscFunctionReturn(PETSC_SUCCESS); 260 } 261 262 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 263 { 264 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 265 PetscInt ncolors = 0; 266 MPI_Comm comm; 267 DMBoundaryType bx, by; 268 DMDAStencilType st; 269 ISColoringValue *colors; 270 DM_DA *dd = (DM_DA *)da->data; 271 272 PetscFunctionBegin; 273 /* 274 nc - number of components per grid point 275 col - number of colors needed in one direction for single component problem 276 277 */ 278 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 279 col = 2 * s + 1; 280 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 281 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 282 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 283 284 /* special case as taught to us by Paul Hovland */ 285 if (st == DMDA_STENCIL_STAR && s == 1) { 286 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 287 } else { 288 if (ctype == IS_COLORING_GLOBAL) { 289 if (!dd->localcoloring) { 290 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 291 ii = 0; 292 for (j = ys; j < ys + ny; j++) { 293 for (i = xs; i < xs + nx; i++) { 294 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 295 } 296 } 297 ncolors = nc + nc * (col - 1 + col * (col - 1)); 298 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 299 } 300 *coloring = dd->localcoloring; 301 } else if (ctype == IS_COLORING_LOCAL) { 302 if (!dd->ghostedcoloring) { 303 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 304 ii = 0; 305 for (j = gys; j < gys + gny; j++) { 306 for (i = gxs; i < gxs + gnx; i++) { 307 for (k = 0; k < nc; k++) { 308 /* the complicated stuff is to handle periodic boundaries */ 309 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 310 } 311 } 312 } 313 ncolors = nc + nc * (col - 1 + col * (col - 1)); 314 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 315 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 316 317 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 318 } 319 *coloring = dd->ghostedcoloring; 320 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 321 } 322 PetscCall(ISColoringReference(*coloring)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 327 { 328 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 329 PetscInt ncolors; 330 MPI_Comm comm; 331 DMBoundaryType bx, by, bz; 332 DMDAStencilType st; 333 ISColoringValue *colors; 334 DM_DA *dd = (DM_DA *)da->data; 335 336 PetscFunctionBegin; 337 /* 338 nc - number of components per grid point 339 col - number of colors needed in one direction for single component problem 340 */ 341 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 342 col = 2 * s + 1; 343 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 344 by 2*stencil_width + 1\n"); 345 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 346 by 2*stencil_width + 1\n"); 347 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 348 by 2*stencil_width + 1\n"); 349 350 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 351 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 352 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 353 354 /* create the coloring */ 355 if (ctype == IS_COLORING_GLOBAL) { 356 if (!dd->localcoloring) { 357 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 358 ii = 0; 359 for (k = zs; k < zs + nz; k++) { 360 for (j = ys; j < ys + ny; j++) { 361 for (i = xs; i < xs + nx; i++) { 362 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 363 } 364 } 365 } 366 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 367 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 368 } 369 *coloring = dd->localcoloring; 370 } else if (ctype == IS_COLORING_LOCAL) { 371 if (!dd->ghostedcoloring) { 372 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 373 ii = 0; 374 for (k = gzs; k < gzs + gnz; k++) { 375 for (j = gys; j < gys + gny; j++) { 376 for (i = gxs; i < gxs + gnx; i++) { 377 for (l = 0; l < nc; l++) { 378 /* the complicated stuff is to handle periodic boundaries */ 379 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 385 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 386 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 387 } 388 *coloring = dd->ghostedcoloring; 389 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 390 PetscCall(ISColoringReference(*coloring)); 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 395 { 396 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 397 PetscInt ncolors; 398 MPI_Comm comm; 399 DMBoundaryType bx; 400 ISColoringValue *colors; 401 DM_DA *dd = (DM_DA *)da->data; 402 403 PetscFunctionBegin; 404 /* 405 nc - number of components per grid point 406 col - number of colors needed in one direction for single component problem 407 */ 408 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 409 col = 2 * s + 1; 410 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 411 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 412 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 413 414 /* create the coloring */ 415 if (ctype == IS_COLORING_GLOBAL) { 416 if (!dd->localcoloring) { 417 PetscCall(PetscMalloc1(nc * nx, &colors)); 418 if (dd->ofillcols) { 419 PetscInt tc = 0; 420 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 421 i1 = 0; 422 for (i = xs; i < xs + nx; i++) { 423 for (l = 0; l < nc; l++) { 424 if (dd->ofillcols[l] && (i % col)) { 425 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 426 } else { 427 colors[i1++] = l; 428 } 429 } 430 } 431 ncolors = nc + 2 * s * tc; 432 } else { 433 i1 = 0; 434 for (i = xs; i < xs + nx; i++) { 435 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 436 } 437 ncolors = nc + nc * (col - 1); 438 } 439 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 440 } 441 *coloring = dd->localcoloring; 442 } else if (ctype == IS_COLORING_LOCAL) { 443 if (!dd->ghostedcoloring) { 444 PetscCall(PetscMalloc1(nc * gnx, &colors)); 445 i1 = 0; 446 for (i = gxs; i < gxs + gnx; i++) { 447 for (l = 0; l < nc; l++) { 448 /* the complicated stuff is to handle periodic boundaries */ 449 colors[i1++] = l + nc * (SetInRange(i, m) % col); 450 } 451 } 452 ncolors = nc + nc * (col - 1); 453 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 454 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 455 } 456 *coloring = dd->ghostedcoloring; 457 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 458 PetscCall(ISColoringReference(*coloring)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 463 { 464 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 465 PetscInt ncolors; 466 MPI_Comm comm; 467 DMBoundaryType bx, by; 468 ISColoringValue *colors; 469 DM_DA *dd = (DM_DA *)da->data; 470 471 PetscFunctionBegin; 472 /* 473 nc - number of components per grid point 474 col - number of colors needed in one direction for single component problem 475 */ 476 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 477 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 478 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 479 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 480 /* create the coloring */ 481 if (ctype == IS_COLORING_GLOBAL) { 482 if (!dd->localcoloring) { 483 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 484 ii = 0; 485 for (j = ys; j < ys + ny; j++) { 486 for (i = xs; i < xs + nx; i++) { 487 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 488 } 489 } 490 ncolors = 5 * nc; 491 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 492 } 493 *coloring = dd->localcoloring; 494 } else if (ctype == IS_COLORING_LOCAL) { 495 if (!dd->ghostedcoloring) { 496 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 497 ii = 0; 498 for (j = gys; j < gys + gny; j++) { 499 for (i = gxs; i < gxs + gnx; i++) { 500 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 501 } 502 } 503 ncolors = 5 * nc; 504 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 505 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 506 } 507 *coloring = dd->ghostedcoloring; 508 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 /* =========================================================================== */ 513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 527 528 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 529 { 530 DM da; 531 const char *prefix; 532 Mat AA, Anatural; 533 AO ao; 534 PetscInt rstart, rend, *petsc, i; 535 IS is; 536 MPI_Comm comm; 537 PetscViewerFormat format; 538 PetscBool flag; 539 540 PetscFunctionBegin; 541 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 542 PetscCall(PetscViewerGetFormat(viewer, &format)); 543 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 544 545 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 546 PetscCall(MatGetDM(A, &da)); 547 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 548 549 PetscCall(DMDAGetAO(da, &ao)); 550 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 551 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 552 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 553 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 554 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 555 556 /* call viewer on natural ordering */ 557 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 558 if (flag) { 559 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 560 A = AA; 561 } 562 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 563 PetscCall(ISDestroy(&is)); 564 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 565 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 566 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 567 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 568 PetscCall(MatView(Anatural, viewer)); 569 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 570 PetscCall(MatDestroy(&Anatural)); 571 if (flag) PetscCall(MatDestroy(&AA)); 572 PetscFunctionReturn(PETSC_SUCCESS); 573 } 574 575 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 576 { 577 DM da; 578 Mat Anatural, Aapp; 579 AO ao; 580 PetscInt rstart, rend, *app, i, m, n, M, N; 581 IS is; 582 MPI_Comm comm; 583 584 PetscFunctionBegin; 585 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 586 PetscCall(MatGetDM(A, &da)); 587 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 588 589 /* Load the matrix in natural ordering */ 590 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 591 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 592 PetscCall(MatGetSize(A, &M, &N)); 593 PetscCall(MatGetLocalSize(A, &m, &n)); 594 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 595 PetscCall(MatLoad(Anatural, viewer)); 596 597 /* Map natural ordering to application ordering and create IS */ 598 PetscCall(DMDAGetAO(da, &ao)); 599 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 600 PetscCall(PetscMalloc1(rend - rstart, &app)); 601 for (i = rstart; i < rend; i++) app[i - rstart] = i; 602 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 603 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 604 605 /* Do permutation and replace header */ 606 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 607 PetscCall(MatHeaderReplace(A, &Aapp)); 608 PetscCall(ISDestroy(&is)); 609 PetscCall(MatDestroy(&Anatural)); 610 PetscFunctionReturn(PETSC_SUCCESS); 611 } 612 613 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 614 { 615 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 616 Mat A; 617 MPI_Comm comm; 618 MatType Atype; 619 MatType mtype; 620 PetscMPIInt size; 621 DM_DA *dd = (DM_DA *)da->data; 622 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 623 624 PetscFunctionBegin; 625 PetscCall(MatInitializePackage()); 626 mtype = da->mattype; 627 628 /* 629 m 630 ------------------------------------------------------ 631 | | 632 | | 633 | ---------------------- | 634 | | | | 635 n | ny | | | 636 | | | | 637 | .--------------------- | 638 | (xs,ys) nx | 639 | . | 640 | (gxs,gys) | 641 | | 642 ----------------------------------------------------- 643 */ 644 645 /* 646 nc - number of components per grid point 647 col - number of colors needed in one direction for single component problem 648 */ 649 M = dd->M; 650 N = dd->N; 651 P = dd->P; 652 dim = da->dim; 653 dof = dd->w; 654 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 655 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 656 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 657 PetscCall(MatCreate(comm, &A)); 658 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 659 PetscCall(MatSetType(A, mtype)); 660 PetscCall(MatSetFromOptions(A)); 661 if (dof * nx * ny * nz < da->bind_below) { 662 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 663 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 664 } 665 PetscCall(MatSetDM(A, da)); 666 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 667 PetscCall(MatGetType(A, &Atype)); 668 /* 669 We do not provide a getmatrix function in the DMDA operations because 670 the basic DMDA does not know about matrices. We think of DMDA as being more 671 more low-level than matrices. This is kind of cheating but, cause sometimes 672 we think of DMDA has higher level than matrices. 673 674 We could switch based on Atype (or mtype), but we do not since the 675 specialized setting routines depend only on the particular preallocation 676 details of the matrix, not the type itself. 677 */ 678 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 679 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 680 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 681 if (!aij) { 682 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 683 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 684 if (!baij) { 685 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 686 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 687 if (!sbaij) { 688 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 689 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 690 } 691 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 692 } 693 } 694 } 695 if (aij) { 696 if (dim == 1) { 697 if (dd->ofill) { 698 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 699 } else { 700 DMBoundaryType bx; 701 PetscMPIInt size; 702 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 703 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 704 if (size == 1 && bx == DM_BOUNDARY_NONE) { 705 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 706 } else { 707 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 708 } 709 } 710 } else if (dim == 2) { 711 if (dd->ofill) { 712 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 713 } else { 714 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 715 } 716 } else if (dim == 3) { 717 if (dd->ofill) { 718 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 719 } else { 720 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 721 } 722 } 723 } else if (baij) { 724 if (dim == 2) { 725 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 726 } else if (dim == 3) { 727 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 728 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 729 } else if (sbaij) { 730 if (dim == 2) { 731 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 732 } else if (dim == 3) { 733 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 734 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 735 } else if (sell) { 736 if (dim == 2) { 737 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 738 } else if (dim == 3) { 739 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 740 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 741 } else if (is) { 742 PetscCall(DMCreateMatrix_DA_IS(da, A)); 743 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 744 ISLocalToGlobalMapping ltog; 745 746 PetscCall(MatSetBlockSize(A, dof)); 747 PetscCall(MatSetUp(A)); 748 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 749 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 750 } 751 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 752 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 753 PetscCall(MatSetDM(A, da)); 754 PetscCallMPI(MPI_Comm_size(comm, &size)); 755 if (size > 1) { 756 /* change viewer to display matrix in natural ordering */ 757 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 758 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 759 } 760 *J = A; 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 765 766 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 767 { 768 DM_DA *da = (DM_DA *)dm->data; 769 Mat lJ, P; 770 ISLocalToGlobalMapping ltog; 771 IS is; 772 PetscBT bt; 773 const PetscInt *e_loc, *idx; 774 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 775 776 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 777 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 778 PetscFunctionBegin; 779 dof = da->w; 780 PetscCall(MatSetBlockSize(J, dof)); 781 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 782 783 /* flag local elements indices in local DMDA numbering */ 784 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 785 PetscCall(PetscBTCreate(nv / dof, &bt)); 786 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 787 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 788 789 /* filter out (set to -1) the global indices not used by the local elements */ 790 PetscCall(PetscMalloc1(nv / dof, &gidx)); 791 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 792 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 793 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 794 for (i = 0; i < nv / dof; i++) 795 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 796 PetscCall(PetscBTDestroy(&bt)); 797 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 798 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 799 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 800 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 801 PetscCall(ISDestroy(&is)); 802 803 /* Preallocation */ 804 if (dm->prealloc_skip) { 805 PetscCall(MatSetUp(J)); 806 } else { 807 PetscCall(MatISGetLocalMat(J, &lJ)); 808 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 809 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 810 PetscCall(MatSetType(P, MATPREALLOCATOR)); 811 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 812 PetscCall(MatGetSize(lJ, &N, NULL)); 813 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 814 PetscCall(MatSetSizes(P, n, n, N, N)); 815 PetscCall(MatSetBlockSize(P, dof)); 816 PetscCall(MatSetUp(P)); 817 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 818 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 819 PetscCall(MatISRestoreLocalMat(J, &lJ)); 820 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 821 PetscCall(MatDestroy(&P)); 822 823 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 824 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 825 } 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 830 { 831 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 832 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 833 MPI_Comm comm; 834 PetscScalar *values; 835 DMBoundaryType bx, by; 836 ISLocalToGlobalMapping ltog; 837 DMDAStencilType st; 838 839 PetscFunctionBegin; 840 /* 841 nc - number of components per grid point 842 col - number of colors needed in one direction for single component problem 843 */ 844 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 845 col = 2 * s + 1; 846 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 847 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 848 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 849 850 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 851 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 852 853 PetscCall(MatSetBlockSize(J, nc)); 854 /* determine the matrix preallocation information */ 855 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 856 for (i = xs; i < xs + nx; i++) { 857 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 858 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 859 860 for (j = ys; j < ys + ny; j++) { 861 slot = i - gxs + gnx * (j - gys); 862 863 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 864 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 865 866 cnt = 0; 867 for (k = 0; k < nc; k++) { 868 for (l = lstart; l < lend + 1; l++) { 869 for (p = pstart; p < pend + 1; p++) { 870 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 871 cols[cnt++] = k + nc * (slot + gnx * l + p); 872 } 873 } 874 } 875 rows[k] = k + nc * (slot); 876 } 877 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 878 } 879 } 880 PetscCall(MatSetBlockSize(J, nc)); 881 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 882 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 883 MatPreallocateEnd(dnz, onz); 884 885 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 886 887 /* 888 For each node in the grid: we get the neighbors in the local (on processor ordering 889 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 890 PETSc ordering. 891 */ 892 if (!da->prealloc_only) { 893 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 894 for (i = xs; i < xs + nx; i++) { 895 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 896 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 897 898 for (j = ys; j < ys + ny; j++) { 899 slot = i - gxs + gnx * (j - gys); 900 901 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 902 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 903 904 cnt = 0; 905 for (k = 0; k < nc; k++) { 906 for (l = lstart; l < lend + 1; l++) { 907 for (p = pstart; p < pend + 1; p++) { 908 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 909 cols[cnt++] = k + nc * (slot + gnx * l + p); 910 } 911 } 912 } 913 rows[k] = k + nc * (slot); 914 } 915 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 916 } 917 } 918 PetscCall(PetscFree(values)); 919 /* do not copy values to GPU since they are all zero and not yet needed there */ 920 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 921 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 922 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 924 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 925 } 926 PetscCall(PetscFree2(rows, cols)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 931 { 932 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 933 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 934 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 935 MPI_Comm comm; 936 PetscScalar *values; 937 DMBoundaryType bx, by, bz; 938 ISLocalToGlobalMapping ltog; 939 DMDAStencilType st; 940 941 PetscFunctionBegin; 942 /* 943 nc - number of components per grid point 944 col - number of colors needed in one direction for single component problem 945 */ 946 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 947 col = 2 * s + 1; 948 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 949 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 950 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 951 952 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 953 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 954 955 PetscCall(MatSetBlockSize(J, nc)); 956 /* determine the matrix preallocation information */ 957 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 958 for (i = xs; i < xs + nx; i++) { 959 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 960 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 961 for (j = ys; j < ys + ny; j++) { 962 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 963 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 964 for (k = zs; k < zs + nz; k++) { 965 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 966 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 967 968 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 969 970 cnt = 0; 971 for (l = 0; l < nc; l++) { 972 for (ii = istart; ii < iend + 1; ii++) { 973 for (jj = jstart; jj < jend + 1; jj++) { 974 for (kk = kstart; kk < kend + 1; kk++) { 975 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 976 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 977 } 978 } 979 } 980 } 981 rows[l] = l + nc * (slot); 982 } 983 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 984 } 985 } 986 } 987 PetscCall(MatSetBlockSize(J, nc)); 988 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 989 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 990 MatPreallocateEnd(dnz, onz); 991 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 992 993 /* 994 For each node in the grid: we get the neighbors in the local (on processor ordering 995 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 996 PETSc ordering. 997 */ 998 if (!da->prealloc_only) { 999 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1000 for (i = xs; i < xs + nx; i++) { 1001 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1002 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1003 for (j = ys; j < ys + ny; j++) { 1004 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1005 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1006 for (k = zs; k < zs + nz; k++) { 1007 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1008 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1009 1010 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1011 1012 cnt = 0; 1013 for (l = 0; l < nc; l++) { 1014 for (ii = istart; ii < iend + 1; ii++) { 1015 for (jj = jstart; jj < jend + 1; jj++) { 1016 for (kk = kstart; kk < kend + 1; kk++) { 1017 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1018 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1019 } 1020 } 1021 } 1022 } 1023 rows[l] = l + nc * (slot); 1024 } 1025 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1026 } 1027 } 1028 } 1029 PetscCall(PetscFree(values)); 1030 /* do not copy values to GPU since they are all zero and not yet needed there */ 1031 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1032 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1033 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1034 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1035 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1036 } 1037 PetscCall(PetscFree2(rows, cols)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } 1040 1041 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1042 { 1043 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1044 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1045 MPI_Comm comm; 1046 DMBoundaryType bx, by; 1047 ISLocalToGlobalMapping ltog, mltog; 1048 DMDAStencilType st; 1049 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1050 1051 PetscFunctionBegin; 1052 /* 1053 nc - number of components per grid point 1054 col - number of colors needed in one direction for single component problem 1055 */ 1056 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1057 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1058 col = 2 * s + 1; 1059 /* 1060 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1061 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1062 */ 1063 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1064 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1065 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1066 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1067 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1068 1069 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1070 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1071 1072 PetscCall(MatSetBlockSize(J, nc)); 1073 /* determine the matrix preallocation information */ 1074 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1075 for (i = xs; i < xs + nx; i++) { 1076 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1077 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1078 1079 for (j = ys; j < ys + ny; j++) { 1080 slot = i - gxs + gnx * (j - gys); 1081 1082 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1083 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1084 1085 cnt = 0; 1086 for (k = 0; k < nc; k++) { 1087 for (l = lstart; l < lend + 1; l++) { 1088 for (p = pstart; p < pend + 1; p++) { 1089 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1090 cols[cnt++] = k + nc * (slot + gnx * l + p); 1091 } 1092 } 1093 } 1094 rows[k] = k + nc * (slot); 1095 } 1096 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1097 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1098 } 1099 } 1100 PetscCall(MatSetBlockSize(J, nc)); 1101 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1102 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1103 MatPreallocateEnd(dnz, onz); 1104 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1105 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1106 1107 /* 1108 For each node in the grid: we get the neighbors in the local (on processor ordering 1109 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1110 PETSc ordering. 1111 */ 1112 if (!da->prealloc_only) { 1113 for (i = xs; i < xs + nx; i++) { 1114 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1115 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1116 1117 for (j = ys; j < ys + ny; j++) { 1118 slot = i - gxs + gnx * (j - gys); 1119 1120 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1121 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1122 1123 cnt = 0; 1124 for (l = lstart; l < lend + 1; l++) { 1125 for (p = pstart; p < pend + 1; p++) { 1126 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1127 cols[cnt++] = nc * (slot + gnx * l + p); 1128 for (k = 1; k < nc; k++) { 1129 cols[cnt] = 1 + cols[cnt - 1]; 1130 cnt++; 1131 } 1132 } 1133 } 1134 } 1135 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1136 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1137 } 1138 } 1139 /* do not copy values to GPU since they are all zero and not yet needed there */ 1140 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1141 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1142 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1143 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1144 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1145 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1146 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1147 } 1148 PetscCall(PetscFree2(rows, cols)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1153 { 1154 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1155 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1156 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1157 DM_DA *dd = (DM_DA *)da->data; 1158 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1159 MPI_Comm comm; 1160 DMBoundaryType bx, by; 1161 ISLocalToGlobalMapping ltog; 1162 DMDAStencilType st; 1163 PetscBool removedups = PETSC_FALSE; 1164 1165 PetscFunctionBegin; 1166 /* 1167 nc - number of components per grid point 1168 col - number of colors needed in one direction for single component problem 1169 */ 1170 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1171 col = 2 * s + 1; 1172 /* 1173 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1174 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1175 */ 1176 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1177 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1178 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1179 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1180 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1181 1182 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1183 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1184 1185 PetscCall(MatSetBlockSize(J, nc)); 1186 /* determine the matrix preallocation information */ 1187 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1188 for (i = xs; i < xs + nx; i++) { 1189 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1190 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1191 1192 for (j = ys; j < ys + ny; j++) { 1193 slot = i - gxs + gnx * (j - gys); 1194 1195 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1196 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1197 1198 for (k = 0; k < nc; k++) { 1199 cnt = 0; 1200 for (l = lstart; l < lend + 1; l++) { 1201 for (p = pstart; p < pend + 1; p++) { 1202 if (l || p) { 1203 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1204 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1205 } 1206 } else { 1207 if (dfill) { 1208 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1209 } else { 1210 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1211 } 1212 } 1213 } 1214 } 1215 row = k + nc * (slot); 1216 maxcnt = PetscMax(maxcnt, cnt); 1217 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1218 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1219 } 1220 } 1221 } 1222 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1223 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1224 MatPreallocateEnd(dnz, onz); 1225 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1226 1227 /* 1228 For each node in the grid: we get the neighbors in the local (on processor ordering 1229 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1230 PETSc ordering. 1231 */ 1232 if (!da->prealloc_only) { 1233 for (i = xs; i < xs + nx; i++) { 1234 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1235 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1236 1237 for (j = ys; j < ys + ny; j++) { 1238 slot = i - gxs + gnx * (j - gys); 1239 1240 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1241 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1242 1243 for (k = 0; k < nc; k++) { 1244 cnt = 0; 1245 for (l = lstart; l < lend + 1; l++) { 1246 for (p = pstart; p < pend + 1; p++) { 1247 if (l || p) { 1248 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1249 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1250 } 1251 } else { 1252 if (dfill) { 1253 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1254 } else { 1255 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1256 } 1257 } 1258 } 1259 } 1260 row = k + nc * (slot); 1261 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1262 } 1263 } 1264 } 1265 /* do not copy values to GPU since they are all zero and not yet needed there */ 1266 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1267 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1268 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1269 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1270 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1271 } 1272 PetscCall(PetscFree(cols)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1277 { 1278 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1279 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1280 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1281 MPI_Comm comm; 1282 DMBoundaryType bx, by, bz; 1283 ISLocalToGlobalMapping ltog, mltog; 1284 DMDAStencilType st; 1285 PetscBool removedups = PETSC_FALSE; 1286 1287 PetscFunctionBegin; 1288 /* 1289 nc - number of components per grid point 1290 col - number of colors needed in one direction for single component problem 1291 */ 1292 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1293 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1294 col = 2 * s + 1; 1295 1296 /* 1297 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1298 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1299 */ 1300 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1301 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1302 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1303 1304 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1305 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1306 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1307 1308 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1309 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1310 1311 PetscCall(MatSetBlockSize(J, nc)); 1312 /* determine the matrix preallocation information */ 1313 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1314 for (i = xs; i < xs + nx; i++) { 1315 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1316 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1317 for (j = ys; j < ys + ny; j++) { 1318 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1319 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1320 for (k = zs; k < zs + nz; k++) { 1321 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1322 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1323 1324 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1325 1326 cnt = 0; 1327 for (l = 0; l < nc; l++) { 1328 for (ii = istart; ii < iend + 1; ii++) { 1329 for (jj = jstart; jj < jend + 1; jj++) { 1330 for (kk = kstart; kk < kend + 1; kk++) { 1331 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1332 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1333 } 1334 } 1335 } 1336 } 1337 rows[l] = l + nc * (slot); 1338 } 1339 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1340 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1341 } 1342 } 1343 } 1344 PetscCall(MatSetBlockSize(J, nc)); 1345 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1346 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1347 MatPreallocateEnd(dnz, onz); 1348 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1349 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1350 1351 /* 1352 For each node in the grid: we get the neighbors in the local (on processor ordering 1353 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1354 PETSc ordering. 1355 */ 1356 if (!da->prealloc_only) { 1357 for (i = xs; i < xs + nx; i++) { 1358 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1359 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1360 for (j = ys; j < ys + ny; j++) { 1361 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1362 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1363 for (k = zs; k < zs + nz; k++) { 1364 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1365 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1366 1367 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1368 1369 cnt = 0; 1370 for (kk = kstart; kk < kend + 1; kk++) { 1371 for (jj = jstart; jj < jend + 1; jj++) { 1372 for (ii = istart; ii < iend + 1; ii++) { 1373 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1374 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1375 for (l = 1; l < nc; l++) { 1376 cols[cnt] = 1 + cols[cnt - 1]; 1377 cnt++; 1378 } 1379 } 1380 } 1381 } 1382 } 1383 rows[0] = nc * (slot); 1384 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1385 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1386 } 1387 } 1388 } 1389 /* do not copy values to GPU since they are all zero and not yet needed there */ 1390 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1391 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1392 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1393 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1394 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1395 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1396 } 1397 PetscCall(PetscFree2(rows, cols)); 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1402 { 1403 DM_DA *dd = (DM_DA *)da->data; 1404 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1405 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1406 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1407 DMBoundaryType bx; 1408 ISLocalToGlobalMapping ltog; 1409 PetscMPIInt rank, size; 1410 1411 PetscFunctionBegin; 1412 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1413 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1414 1415 /* 1416 nc - number of components per grid point 1417 */ 1418 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1419 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1420 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1421 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1422 1423 PetscCall(MatSetBlockSize(J, nc)); 1424 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1425 1426 /* 1427 note should be smaller for first and last process with no periodic 1428 does not handle dfill 1429 */ 1430 cnt = 0; 1431 /* coupling with process to the left */ 1432 for (i = 0; i < s; i++) { 1433 for (j = 0; j < nc; j++) { 1434 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1435 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1436 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1437 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1438 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1439 } 1440 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1441 cnt++; 1442 } 1443 } 1444 for (i = s; i < nx - s; i++) { 1445 for (j = 0; j < nc; j++) { 1446 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1447 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1448 cnt++; 1449 } 1450 } 1451 /* coupling with process to the right */ 1452 for (i = nx - s; i < nx; i++) { 1453 for (j = 0; j < nc; j++) { 1454 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1455 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1456 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1457 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1458 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1459 } 1460 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1461 cnt++; 1462 } 1463 } 1464 1465 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1466 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1467 PetscCall(PetscFree2(cols, ocols)); 1468 1469 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1470 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1471 1472 /* 1473 For each node in the grid: we get the neighbors in the local (on processor ordering 1474 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1475 PETSc ordering. 1476 */ 1477 if (!da->prealloc_only) { 1478 PetscCall(PetscMalloc1(maxcnt, &cols)); 1479 row = xs * nc; 1480 /* coupling with process to the left */ 1481 for (i = xs; i < xs + s; i++) { 1482 for (j = 0; j < nc; j++) { 1483 cnt = 0; 1484 if (rank) { 1485 for (l = 0; l < s; l++) { 1486 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1487 } 1488 } 1489 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1490 for (l = 0; l < s; l++) { 1491 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1492 } 1493 } 1494 if (dfill) { 1495 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1496 } else { 1497 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1498 } 1499 for (l = 0; l < s; l++) { 1500 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1501 } 1502 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1503 row++; 1504 } 1505 } 1506 for (i = xs + s; i < xs + nx - s; i++) { 1507 for (j = 0; j < nc; j++) { 1508 cnt = 0; 1509 for (l = 0; l < s; l++) { 1510 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1511 } 1512 if (dfill) { 1513 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1514 } else { 1515 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1516 } 1517 for (l = 0; l < s; l++) { 1518 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1519 } 1520 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1521 row++; 1522 } 1523 } 1524 /* coupling with process to the right */ 1525 for (i = xs + nx - s; i < xs + nx; i++) { 1526 for (j = 0; j < nc; j++) { 1527 cnt = 0; 1528 for (l = 0; l < s; l++) { 1529 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1530 } 1531 if (dfill) { 1532 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1533 } else { 1534 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1535 } 1536 if (rank < size - 1) { 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1539 } 1540 } 1541 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1542 for (l = 0; l < s; l++) { 1543 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1544 } 1545 } 1546 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1547 row++; 1548 } 1549 } 1550 PetscCall(PetscFree(cols)); 1551 /* do not copy values to GPU since they are all zero and not yet needed there */ 1552 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1553 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1554 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1555 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1556 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1557 } 1558 PetscFunctionReturn(PETSC_SUCCESS); 1559 } 1560 1561 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1562 { 1563 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1564 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1565 PetscInt istart, iend; 1566 DMBoundaryType bx; 1567 ISLocalToGlobalMapping ltog, mltog; 1568 1569 PetscFunctionBegin; 1570 /* 1571 nc - number of components per grid point 1572 col - number of colors needed in one direction for single component problem 1573 */ 1574 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1575 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1576 col = 2 * s + 1; 1577 1578 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1579 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1580 1581 PetscCall(MatSetBlockSize(J, nc)); 1582 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1583 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1584 1585 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1586 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1587 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1588 1589 /* 1590 For each node in the grid: we get the neighbors in the local (on processor ordering 1591 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1592 PETSc ordering. 1593 */ 1594 if (!da->prealloc_only) { 1595 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1596 for (i = xs; i < xs + nx; i++) { 1597 istart = PetscMax(-s, gxs - i); 1598 iend = PetscMin(s, gxs + gnx - i - 1); 1599 slot = i - gxs; 1600 1601 cnt = 0; 1602 for (i1 = istart; i1 < iend + 1; i1++) { 1603 cols[cnt++] = nc * (slot + i1); 1604 for (l = 1; l < nc; l++) { 1605 cols[cnt] = 1 + cols[cnt - 1]; 1606 cnt++; 1607 } 1608 } 1609 rows[0] = nc * (slot); 1610 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1611 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1612 } 1613 /* do not copy values to GPU since they are all zero and not yet needed there */ 1614 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1615 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1616 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1617 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1618 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1619 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1620 PetscCall(PetscFree2(rows, cols)); 1621 } 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1626 { 1627 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1628 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1629 PetscInt istart, iend; 1630 DMBoundaryType bx; 1631 ISLocalToGlobalMapping ltog, mltog; 1632 1633 PetscFunctionBegin; 1634 /* 1635 nc - number of components per grid point 1636 col - number of colors needed in one direction for single component problem 1637 */ 1638 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1639 col = 2 * s + 1; 1640 1641 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1642 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1643 1644 PetscCall(MatSetBlockSize(J, nc)); 1645 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1646 1647 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1648 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1649 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1650 1651 /* 1652 For each node in the grid: we get the neighbors in the local (on processor ordering 1653 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1654 PETSc ordering. 1655 */ 1656 if (!da->prealloc_only) { 1657 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1658 for (i = xs; i < xs + nx; i++) { 1659 istart = PetscMax(-s, gxs - i); 1660 iend = PetscMin(s, gxs + gnx - i - 1); 1661 slot = i - gxs; 1662 1663 cnt = 0; 1664 for (i1 = istart; i1 < iend + 1; i1++) { 1665 cols[cnt++] = nc * (slot + i1); 1666 for (l = 1; l < nc; l++) { 1667 cols[cnt] = 1 + cols[cnt - 1]; 1668 cnt++; 1669 } 1670 } 1671 rows[0] = nc * (slot); 1672 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1673 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1674 } 1675 /* do not copy values to GPU since they are all zero and not yet needed there */ 1676 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1677 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1678 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1679 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1680 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1681 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1682 PetscCall(PetscFree2(rows, cols)); 1683 } 1684 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1685 PetscFunctionReturn(PETSC_SUCCESS); 1686 } 1687 1688 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1689 { 1690 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1691 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1692 PetscInt istart, iend, jstart, jend, ii, jj; 1693 MPI_Comm comm; 1694 PetscScalar *values; 1695 DMBoundaryType bx, by; 1696 DMDAStencilType st; 1697 ISLocalToGlobalMapping ltog; 1698 1699 PetscFunctionBegin; 1700 /* 1701 nc - number of components per grid point 1702 col - number of colors needed in one direction for single component problem 1703 */ 1704 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1705 col = 2 * s + 1; 1706 1707 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1708 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1709 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1710 1711 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1712 1713 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1714 1715 /* determine the matrix preallocation information */ 1716 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1717 for (i = xs; i < xs + nx; i++) { 1718 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1719 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1720 for (j = ys; j < ys + ny; j++) { 1721 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1722 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1723 slot = i - gxs + gnx * (j - gys); 1724 1725 /* Find block columns in block row */ 1726 cnt = 0; 1727 for (ii = istart; ii < iend + 1; ii++) { 1728 for (jj = jstart; jj < jend + 1; jj++) { 1729 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1730 cols[cnt++] = slot + ii + gnx * jj; 1731 } 1732 } 1733 } 1734 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1735 } 1736 } 1737 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1738 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1739 MatPreallocateEnd(dnz, onz); 1740 1741 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1742 1743 /* 1744 For each node in the grid: we get the neighbors in the local (on processor ordering 1745 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1746 PETSc ordering. 1747 */ 1748 if (!da->prealloc_only) { 1749 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1750 for (i = xs; i < xs + nx; i++) { 1751 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1752 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1753 for (j = ys; j < ys + ny; j++) { 1754 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1755 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1756 slot = i - gxs + gnx * (j - gys); 1757 cnt = 0; 1758 for (ii = istart; ii < iend + 1; ii++) { 1759 for (jj = jstart; jj < jend + 1; jj++) { 1760 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1761 cols[cnt++] = slot + ii + gnx * jj; 1762 } 1763 } 1764 } 1765 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1766 } 1767 } 1768 PetscCall(PetscFree(values)); 1769 /* do not copy values to GPU since they are all zero and not yet needed there */ 1770 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1771 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1772 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1773 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1774 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1775 } 1776 PetscCall(PetscFree(cols)); 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1781 { 1782 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1783 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1784 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1785 MPI_Comm comm; 1786 PetscScalar *values; 1787 DMBoundaryType bx, by, bz; 1788 DMDAStencilType st; 1789 ISLocalToGlobalMapping ltog; 1790 1791 PetscFunctionBegin; 1792 /* 1793 nc - number of components per grid point 1794 col - number of colors needed in one direction for single component problem 1795 */ 1796 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1797 col = 2 * s + 1; 1798 1799 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1800 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1801 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1802 1803 PetscCall(PetscMalloc1(col * col * col, &cols)); 1804 1805 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1806 1807 /* determine the matrix preallocation information */ 1808 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1809 for (i = xs; i < xs + nx; i++) { 1810 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1811 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1812 for (j = ys; j < ys + ny; j++) { 1813 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1814 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1815 for (k = zs; k < zs + nz; k++) { 1816 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1817 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1818 1819 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1820 1821 /* Find block columns in block row */ 1822 cnt = 0; 1823 for (ii = istart; ii < iend + 1; ii++) { 1824 for (jj = jstart; jj < jend + 1; jj++) { 1825 for (kk = kstart; kk < kend + 1; kk++) { 1826 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1827 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1828 } 1829 } 1830 } 1831 } 1832 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1833 } 1834 } 1835 } 1836 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1837 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1838 MatPreallocateEnd(dnz, onz); 1839 1840 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1841 1842 /* 1843 For each node in the grid: we get the neighbors in the local (on processor ordering 1844 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1845 PETSc ordering. 1846 */ 1847 if (!da->prealloc_only) { 1848 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1849 for (i = xs; i < xs + nx; i++) { 1850 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1851 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1852 for (j = ys; j < ys + ny; j++) { 1853 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1854 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1855 for (k = zs; k < zs + nz; k++) { 1856 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1857 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1858 1859 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1860 1861 cnt = 0; 1862 for (ii = istart; ii < iend + 1; ii++) { 1863 for (jj = jstart; jj < jend + 1; jj++) { 1864 for (kk = kstart; kk < kend + 1; kk++) { 1865 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1866 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1867 } 1868 } 1869 } 1870 } 1871 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1872 } 1873 } 1874 } 1875 PetscCall(PetscFree(values)); 1876 /* do not copy values to GPU since they are all zero and not yet needed there */ 1877 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1878 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1879 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1880 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1881 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1882 } 1883 PetscCall(PetscFree(cols)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 /* 1888 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1889 identify in the local ordering with periodic domain. 1890 */ 1891 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1892 { 1893 PetscInt i, n; 1894 1895 PetscFunctionBegin; 1896 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1897 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1898 for (i = 0, n = 0; i < *cnt; i++) { 1899 if (col[i] >= *row) col[n++] = col[i]; 1900 } 1901 *cnt = n; 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1906 { 1907 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1908 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1909 PetscInt istart, iend, jstart, jend, ii, jj; 1910 MPI_Comm comm; 1911 PetscScalar *values; 1912 DMBoundaryType bx, by; 1913 DMDAStencilType st; 1914 ISLocalToGlobalMapping ltog; 1915 1916 PetscFunctionBegin; 1917 /* 1918 nc - number of components per grid point 1919 col - number of colors needed in one direction for single component problem 1920 */ 1921 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1922 col = 2 * s + 1; 1923 1924 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1925 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1926 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1927 1928 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1929 1930 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1931 1932 /* determine the matrix preallocation information */ 1933 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1934 for (i = xs; i < xs + nx; i++) { 1935 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1936 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1937 for (j = ys; j < ys + ny; j++) { 1938 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1939 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1940 slot = i - gxs + gnx * (j - gys); 1941 1942 /* Find block columns in block row */ 1943 cnt = 0; 1944 for (ii = istart; ii < iend + 1; ii++) { 1945 for (jj = jstart; jj < jend + 1; jj++) { 1946 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1947 } 1948 } 1949 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1950 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1951 } 1952 } 1953 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1954 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1955 MatPreallocateEnd(dnz, onz); 1956 1957 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1958 1959 /* 1960 For each node in the grid: we get the neighbors in the local (on processor ordering 1961 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1962 PETSc ordering. 1963 */ 1964 if (!da->prealloc_only) { 1965 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1966 for (i = xs; i < xs + nx; i++) { 1967 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1968 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1969 for (j = ys; j < ys + ny; j++) { 1970 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1971 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1972 slot = i - gxs + gnx * (j - gys); 1973 1974 /* Find block columns in block row */ 1975 cnt = 0; 1976 for (ii = istart; ii < iend + 1; ii++) { 1977 for (jj = jstart; jj < jend + 1; jj++) { 1978 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1979 } 1980 } 1981 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1982 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1983 } 1984 } 1985 PetscCall(PetscFree(values)); 1986 /* do not copy values to GPU since they are all zero and not yet needed there */ 1987 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1988 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1989 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1991 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1992 } 1993 PetscCall(PetscFree(cols)); 1994 PetscFunctionReturn(PETSC_SUCCESS); 1995 } 1996 1997 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1998 { 1999 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2000 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2001 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2002 MPI_Comm comm; 2003 PetscScalar *values; 2004 DMBoundaryType bx, by, bz; 2005 DMDAStencilType st; 2006 ISLocalToGlobalMapping ltog; 2007 2008 PetscFunctionBegin; 2009 /* 2010 nc - number of components per grid point 2011 col - number of colors needed in one direction for single component problem 2012 */ 2013 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2014 col = 2 * s + 1; 2015 2016 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2017 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2018 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2019 2020 /* create the matrix */ 2021 PetscCall(PetscMalloc1(col * col * col, &cols)); 2022 2023 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2024 2025 /* determine the matrix preallocation information */ 2026 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2027 for (i = xs; i < xs + nx; i++) { 2028 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2029 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2030 for (j = ys; j < ys + ny; j++) { 2031 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2032 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2033 for (k = zs; k < zs + nz; k++) { 2034 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2035 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2036 2037 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2038 2039 /* Find block columns in block row */ 2040 cnt = 0; 2041 for (ii = istart; ii < iend + 1; ii++) { 2042 for (jj = jstart; jj < jend + 1; jj++) { 2043 for (kk = kstart; kk < kend + 1; kk++) { 2044 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2045 } 2046 } 2047 } 2048 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2049 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2050 } 2051 } 2052 } 2053 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2054 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2055 MatPreallocateEnd(dnz, onz); 2056 2057 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2058 2059 /* 2060 For each node in the grid: we get the neighbors in the local (on processor ordering 2061 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2062 PETSc ordering. 2063 */ 2064 if (!da->prealloc_only) { 2065 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2066 for (i = xs; i < xs + nx; i++) { 2067 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2068 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2069 for (j = ys; j < ys + ny; j++) { 2070 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2071 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2072 for (k = zs; k < zs + nz; k++) { 2073 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2074 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2075 2076 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2077 2078 cnt = 0; 2079 for (ii = istart; ii < iend + 1; ii++) { 2080 for (jj = jstart; jj < jend + 1; jj++) { 2081 for (kk = kstart; kk < kend + 1; kk++) { 2082 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2083 } 2084 } 2085 } 2086 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2087 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2088 } 2089 } 2090 } 2091 PetscCall(PetscFree(values)); 2092 /* do not copy values to GPU since they are all zero and not yet needed there */ 2093 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2094 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2095 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2096 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2097 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2098 } 2099 PetscCall(PetscFree(cols)); 2100 PetscFunctionReturn(PETSC_SUCCESS); 2101 } 2102 2103 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2104 { 2105 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2106 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2107 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2108 DM_DA *dd = (DM_DA *)da->data; 2109 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2110 MPI_Comm comm; 2111 PetscScalar *values; 2112 DMBoundaryType bx, by, bz; 2113 ISLocalToGlobalMapping ltog; 2114 DMDAStencilType st; 2115 PetscBool removedups = PETSC_FALSE; 2116 2117 PetscFunctionBegin; 2118 /* 2119 nc - number of components per grid point 2120 col - number of colors needed in one direction for single component problem 2121 */ 2122 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2123 col = 2 * s + 1; 2124 2125 /* 2126 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2127 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2128 */ 2129 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2130 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2131 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2132 2133 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2134 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2135 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2136 2137 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2138 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2139 2140 /* determine the matrix preallocation information */ 2141 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2142 2143 PetscCall(MatSetBlockSize(J, nc)); 2144 for (i = xs; i < xs + nx; i++) { 2145 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2146 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2147 for (j = ys; j < ys + ny; j++) { 2148 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2149 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2150 for (k = zs; k < zs + nz; k++) { 2151 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2152 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2153 2154 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2155 2156 for (l = 0; l < nc; l++) { 2157 cnt = 0; 2158 for (ii = istart; ii < iend + 1; ii++) { 2159 for (jj = jstart; jj < jend + 1; jj++) { 2160 for (kk = kstart; kk < kend + 1; kk++) { 2161 if (ii || jj || kk) { 2162 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2163 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2164 } 2165 } else { 2166 if (dfill) { 2167 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2168 } else { 2169 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2170 } 2171 } 2172 } 2173 } 2174 } 2175 row = l + nc * (slot); 2176 maxcnt = PetscMax(maxcnt, cnt); 2177 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2178 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2179 } 2180 } 2181 } 2182 } 2183 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2184 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2185 MatPreallocateEnd(dnz, onz); 2186 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2187 2188 /* 2189 For each node in the grid: we get the neighbors in the local (on processor ordering 2190 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2191 PETSc ordering. 2192 */ 2193 if (!da->prealloc_only) { 2194 PetscCall(PetscCalloc1(maxcnt, &values)); 2195 for (i = xs; i < xs + nx; i++) { 2196 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2197 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2198 for (j = ys; j < ys + ny; j++) { 2199 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2200 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2201 for (k = zs; k < zs + nz; k++) { 2202 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2203 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2204 2205 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2206 2207 for (l = 0; l < nc; l++) { 2208 cnt = 0; 2209 for (ii = istart; ii < iend + 1; ii++) { 2210 for (jj = jstart; jj < jend + 1; jj++) { 2211 for (kk = kstart; kk < kend + 1; kk++) { 2212 if (ii || jj || kk) { 2213 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2214 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2215 } 2216 } else { 2217 if (dfill) { 2218 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2219 } else { 2220 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2221 } 2222 } 2223 } 2224 } 2225 } 2226 row = l + nc * (slot); 2227 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2228 } 2229 } 2230 } 2231 } 2232 PetscCall(PetscFree(values)); 2233 /* do not copy values to GPU since they are all zero and not yet needed there */ 2234 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2235 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2236 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2237 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2238 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2239 } 2240 PetscCall(PetscFree(cols)); 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243