1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for `dfill` and `ofill` is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the `dfill`, `ofill` format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 264 { 265 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 266 PetscInt ncolors = 0; 267 MPI_Comm comm; 268 DMBoundaryType bx, by; 269 DMDAStencilType st; 270 ISColoringValue *colors; 271 DM_DA *dd = (DM_DA *)da->data; 272 273 PetscFunctionBegin; 274 /* 275 nc - number of components per grid point 276 col - number of colors needed in one direction for single component problem 277 278 */ 279 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 280 col = 2 * s + 1; 281 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 282 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 283 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 284 285 /* special case as taught to us by Paul Hovland */ 286 if (st == DMDA_STENCIL_STAR && s == 1) { 287 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 288 } else { 289 if (ctype == IS_COLORING_GLOBAL) { 290 if (!dd->localcoloring) { 291 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 292 ii = 0; 293 for (j = ys; j < ys + ny; j++) { 294 for (i = xs; i < xs + nx; i++) { 295 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 296 } 297 } 298 ncolors = nc + nc * (col - 1 + col * (col - 1)); 299 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 300 } 301 *coloring = dd->localcoloring; 302 } else if (ctype == IS_COLORING_LOCAL) { 303 if (!dd->ghostedcoloring) { 304 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 305 ii = 0; 306 for (j = gys; j < gys + gny; j++) { 307 for (i = gxs; i < gxs + gnx; i++) { 308 for (k = 0; k < nc; k++) { 309 /* the complicated stuff is to handle periodic boundaries */ 310 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 311 } 312 } 313 } 314 ncolors = nc + nc * (col - 1 + col * (col - 1)); 315 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 316 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 317 318 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 319 } 320 *coloring = dd->ghostedcoloring; 321 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 322 } 323 PetscCall(ISColoringReference(*coloring)); 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 328 { 329 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 330 PetscInt ncolors; 331 MPI_Comm comm; 332 DMBoundaryType bx, by, bz; 333 DMDAStencilType st; 334 ISColoringValue *colors; 335 DM_DA *dd = (DM_DA *)da->data; 336 337 PetscFunctionBegin; 338 /* 339 nc - number of components per grid point 340 col - number of colors needed in one direction for single component problem 341 */ 342 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 343 col = 2 * s + 1; 344 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 345 by 2*stencil_width + 1\n"); 346 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 347 by 2*stencil_width + 1\n"); 348 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 349 by 2*stencil_width + 1\n"); 350 351 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 352 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 353 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 354 355 /* create the coloring */ 356 if (ctype == IS_COLORING_GLOBAL) { 357 if (!dd->localcoloring) { 358 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 359 ii = 0; 360 for (k = zs; k < zs + nz; k++) { 361 for (j = ys; j < ys + ny; j++) { 362 for (i = xs; i < xs + nx; i++) { 363 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 364 } 365 } 366 } 367 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 368 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 369 } 370 *coloring = dd->localcoloring; 371 } else if (ctype == IS_COLORING_LOCAL) { 372 if (!dd->ghostedcoloring) { 373 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 374 ii = 0; 375 for (k = gzs; k < gzs + gnz; k++) { 376 for (j = gys; j < gys + gny; j++) { 377 for (i = gxs; i < gxs + gnx; i++) { 378 for (l = 0; l < nc; l++) { 379 /* the complicated stuff is to handle periodic boundaries */ 380 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 381 } 382 } 383 } 384 } 385 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 386 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 387 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 388 } 389 *coloring = dd->ghostedcoloring; 390 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 391 PetscCall(ISColoringReference(*coloring)); 392 PetscFunctionReturn(PETSC_SUCCESS); 393 } 394 395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 396 { 397 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 398 PetscInt ncolors; 399 MPI_Comm comm; 400 DMBoundaryType bx; 401 ISColoringValue *colors; 402 DM_DA *dd = (DM_DA *)da->data; 403 404 PetscFunctionBegin; 405 /* 406 nc - number of components per grid point 407 col - number of colors needed in one direction for single component problem 408 */ 409 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 410 col = 2 * s + 1; 411 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 412 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 413 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 414 415 /* create the coloring */ 416 if (ctype == IS_COLORING_GLOBAL) { 417 if (!dd->localcoloring) { 418 PetscCall(PetscMalloc1(nc * nx, &colors)); 419 if (dd->ofillcols) { 420 PetscInt tc = 0; 421 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 422 i1 = 0; 423 for (i = xs; i < xs + nx; i++) { 424 for (l = 0; l < nc; l++) { 425 if (dd->ofillcols[l] && (i % col)) { 426 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 427 } else { 428 colors[i1++] = l; 429 } 430 } 431 } 432 ncolors = nc + 2 * s * tc; 433 } else { 434 i1 = 0; 435 for (i = xs; i < xs + nx; i++) { 436 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 437 } 438 ncolors = nc + nc * (col - 1); 439 } 440 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 441 } 442 *coloring = dd->localcoloring; 443 } else if (ctype == IS_COLORING_LOCAL) { 444 if (!dd->ghostedcoloring) { 445 PetscCall(PetscMalloc1(nc * gnx, &colors)); 446 i1 = 0; 447 for (i = gxs; i < gxs + gnx; i++) { 448 for (l = 0; l < nc; l++) { 449 /* the complicated stuff is to handle periodic boundaries */ 450 colors[i1++] = l + nc * (SetInRange(i, m) % col); 451 } 452 } 453 ncolors = nc + nc * (col - 1); 454 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 455 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 456 } 457 *coloring = dd->ghostedcoloring; 458 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 459 PetscCall(ISColoringReference(*coloring)); 460 PetscFunctionReturn(PETSC_SUCCESS); 461 } 462 463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 464 { 465 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 466 PetscInt ncolors; 467 MPI_Comm comm; 468 DMBoundaryType bx, by; 469 ISColoringValue *colors; 470 DM_DA *dd = (DM_DA *)da->data; 471 472 PetscFunctionBegin; 473 /* 474 nc - number of components per grid point 475 col - number of colors needed in one direction for single component problem 476 */ 477 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 478 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 479 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 480 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 481 /* create the coloring */ 482 if (ctype == IS_COLORING_GLOBAL) { 483 if (!dd->localcoloring) { 484 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 485 ii = 0; 486 for (j = ys; j < ys + ny; j++) { 487 for (i = xs; i < xs + nx; i++) { 488 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 489 } 490 } 491 ncolors = 5 * nc; 492 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 493 } 494 *coloring = dd->localcoloring; 495 } else if (ctype == IS_COLORING_LOCAL) { 496 if (!dd->ghostedcoloring) { 497 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 498 ii = 0; 499 for (j = gys; j < gys + gny; j++) { 500 for (i = gxs; i < gxs + gnx; i++) { 501 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 502 } 503 } 504 ncolors = 5 * nc; 505 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 506 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 507 } 508 *coloring = dd->ghostedcoloring; 509 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 /* =========================================================================== */ 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 528 529 /*@C 530 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 531 532 Logically Collective 533 534 Input Parameters: 535 + mat - the matrix 536 - da - the da 537 538 Level: intermediate 539 540 .seealso: `DMDA`, `Mat`, `MatSetUp()` 541 @*/ 542 PetscErrorCode MatSetupDM(Mat mat, DM da) 543 { 544 PetscFunctionBegin; 545 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 546 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 547 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 552 { 553 DM da; 554 const char *prefix; 555 Mat Anatural; 556 AO ao; 557 PetscInt rstart, rend, *petsc, i; 558 IS is; 559 MPI_Comm comm; 560 PetscViewerFormat format; 561 562 PetscFunctionBegin; 563 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 564 PetscCall(PetscViewerGetFormat(viewer, &format)); 565 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 566 567 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 568 PetscCall(MatGetDM(A, &da)); 569 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 570 571 PetscCall(DMDAGetAO(da, &ao)); 572 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 573 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 574 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 575 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 576 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 577 578 /* call viewer on natural ordering */ 579 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 580 PetscCall(ISDestroy(&is)); 581 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 582 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 583 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 584 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 585 PetscCall(MatView(Anatural, viewer)); 586 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 587 PetscCall(MatDestroy(&Anatural)); 588 PetscFunctionReturn(PETSC_SUCCESS); 589 } 590 591 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 592 { 593 DM da; 594 Mat Anatural, Aapp; 595 AO ao; 596 PetscInt rstart, rend, *app, i, m, n, M, N; 597 IS is; 598 MPI_Comm comm; 599 600 PetscFunctionBegin; 601 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 602 PetscCall(MatGetDM(A, &da)); 603 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 604 605 /* Load the matrix in natural ordering */ 606 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 607 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 608 PetscCall(MatGetSize(A, &M, &N)); 609 PetscCall(MatGetLocalSize(A, &m, &n)); 610 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 611 PetscCall(MatLoad(Anatural, viewer)); 612 613 /* Map natural ordering to application ordering and create IS */ 614 PetscCall(DMDAGetAO(da, &ao)); 615 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 616 PetscCall(PetscMalloc1(rend - rstart, &app)); 617 for (i = rstart; i < rend; i++) app[i - rstart] = i; 618 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 619 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 620 621 /* Do permutation and replace header */ 622 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 623 PetscCall(MatHeaderReplace(A, &Aapp)); 624 PetscCall(ISDestroy(&is)); 625 PetscCall(MatDestroy(&Anatural)); 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 630 { 631 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 632 Mat A; 633 MPI_Comm comm; 634 MatType Atype; 635 MatType mtype; 636 PetscMPIInt size; 637 DM_DA *dd = (DM_DA *)da->data; 638 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 639 640 PetscFunctionBegin; 641 PetscCall(MatInitializePackage()); 642 mtype = da->mattype; 643 644 /* 645 m 646 ------------------------------------------------------ 647 | | 648 | | 649 | ---------------------- | 650 | | | | 651 n | ny | | | 652 | | | | 653 | .--------------------- | 654 | (xs,ys) nx | 655 | . | 656 | (gxs,gys) | 657 | | 658 ----------------------------------------------------- 659 */ 660 661 /* 662 nc - number of components per grid point 663 col - number of colors needed in one direction for single component problem 664 */ 665 M = dd->M; 666 N = dd->N; 667 P = dd->P; 668 dim = da->dim; 669 dof = dd->w; 670 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 671 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 672 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 673 PetscCall(MatCreate(comm, &A)); 674 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 675 PetscCall(MatSetType(A, mtype)); 676 PetscCall(MatSetFromOptions(A)); 677 if (dof * nx * ny * nz < da->bind_below) { 678 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 679 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 680 } 681 PetscCall(MatSetDM(A, da)); 682 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 683 PetscCall(MatGetType(A, &Atype)); 684 /* 685 We do not provide a getmatrix function in the DMDA operations because 686 the basic DMDA does not know about matrices. We think of DMDA as being more 687 more low-level than matrices. This is kind of cheating but, cause sometimes 688 we think of DMDA has higher level than matrices. 689 690 We could switch based on Atype (or mtype), but we do not since the 691 specialized setting routines depend only on the particular preallocation 692 details of the matrix, not the type itself. 693 */ 694 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 695 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 696 if (!aij) { 697 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 698 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 699 if (!baij) { 700 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 701 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 702 if (!sbaij) { 703 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 704 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 705 } 706 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 707 } 708 } 709 if (aij) { 710 if (dim == 1) { 711 if (dd->ofill) { 712 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 713 } else { 714 DMBoundaryType bx; 715 PetscMPIInt size; 716 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 717 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 718 if (size == 1 && bx == DM_BOUNDARY_NONE) { 719 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 720 } else { 721 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 722 } 723 } 724 } else if (dim == 2) { 725 if (dd->ofill) { 726 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 727 } else { 728 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 729 } 730 } else if (dim == 3) { 731 if (dd->ofill) { 732 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 733 } else { 734 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 735 } 736 } 737 } else if (baij) { 738 if (dim == 2) { 739 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 740 } else if (dim == 3) { 741 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 742 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 743 } else if (sbaij) { 744 if (dim == 2) { 745 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 746 } else if (dim == 3) { 747 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 748 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 749 } else if (sell) { 750 if (dim == 2) { 751 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 752 } else if (dim == 3) { 753 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 754 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 755 } else if (is) { 756 PetscCall(DMCreateMatrix_DA_IS(da, A)); 757 } else { 758 ISLocalToGlobalMapping ltog; 759 760 PetscCall(MatSetBlockSize(A, dof)); 761 PetscCall(MatSetUp(A)); 762 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 763 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 764 } 765 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 766 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 767 PetscCall(MatSetDM(A, da)); 768 PetscCallMPI(MPI_Comm_size(comm, &size)); 769 if (size > 1) { 770 /* change viewer to display matrix in natural ordering */ 771 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 772 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 773 } 774 *J = A; 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 /* ---------------------------------------------------------------------------------*/ 779 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 780 781 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 782 { 783 DM_DA *da = (DM_DA *)dm->data; 784 Mat lJ, P; 785 ISLocalToGlobalMapping ltog; 786 IS is; 787 PetscBT bt; 788 const PetscInt *e_loc, *idx; 789 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 790 791 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 792 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 793 PetscFunctionBegin; 794 dof = da->w; 795 PetscCall(MatSetBlockSize(J, dof)); 796 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 797 798 /* flag local elements indices in local DMDA numbering */ 799 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 800 PetscCall(PetscBTCreate(nv / dof, &bt)); 801 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 802 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 803 804 /* filter out (set to -1) the global indices not used by the local elements */ 805 PetscCall(PetscMalloc1(nv / dof, &gidx)); 806 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 807 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 808 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 809 for (i = 0; i < nv / dof; i++) 810 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 811 PetscCall(PetscBTDestroy(&bt)); 812 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 813 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 814 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 815 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 816 PetscCall(ISDestroy(&is)); 817 818 /* Preallocation */ 819 if (dm->prealloc_skip) { 820 PetscCall(MatSetUp(J)); 821 } else { 822 PetscCall(MatISGetLocalMat(J, &lJ)); 823 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 824 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 825 PetscCall(MatSetType(P, MATPREALLOCATOR)); 826 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 827 PetscCall(MatGetSize(lJ, &N, NULL)); 828 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 829 PetscCall(MatSetSizes(P, n, n, N, N)); 830 PetscCall(MatSetBlockSize(P, dof)); 831 PetscCall(MatSetUp(P)); 832 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 833 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 834 PetscCall(MatISRestoreLocalMat(J, &lJ)); 835 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 836 PetscCall(MatDestroy(&P)); 837 838 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 839 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 840 } 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 845 { 846 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 847 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 848 MPI_Comm comm; 849 PetscScalar *values; 850 DMBoundaryType bx, by; 851 ISLocalToGlobalMapping ltog; 852 DMDAStencilType st; 853 854 PetscFunctionBegin; 855 /* 856 nc - number of components per grid point 857 col - number of colors needed in one direction for single component problem 858 */ 859 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 860 col = 2 * s + 1; 861 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 862 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 863 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 864 865 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 866 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 867 868 PetscCall(MatSetBlockSize(J, nc)); 869 /* determine the matrix preallocation information */ 870 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 871 for (i = xs; i < xs + nx; i++) { 872 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 873 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 874 875 for (j = ys; j < ys + ny; j++) { 876 slot = i - gxs + gnx * (j - gys); 877 878 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 879 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 880 881 cnt = 0; 882 for (k = 0; k < nc; k++) { 883 for (l = lstart; l < lend + 1; l++) { 884 for (p = pstart; p < pend + 1; p++) { 885 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 886 cols[cnt++] = k + nc * (slot + gnx * l + p); 887 } 888 } 889 } 890 rows[k] = k + nc * (slot); 891 } 892 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 893 } 894 } 895 PetscCall(MatSetBlockSize(J, nc)); 896 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 897 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 898 MatPreallocateEnd(dnz, onz); 899 900 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 901 902 /* 903 For each node in the grid: we get the neighbors in the local (on processor ordering 904 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 905 PETSc ordering. 906 */ 907 if (!da->prealloc_only) { 908 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 909 for (i = xs; i < xs + nx; i++) { 910 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 911 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 912 913 for (j = ys; j < ys + ny; j++) { 914 slot = i - gxs + gnx * (j - gys); 915 916 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 917 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 918 919 cnt = 0; 920 for (k = 0; k < nc; k++) { 921 for (l = lstart; l < lend + 1; l++) { 922 for (p = pstart; p < pend + 1; p++) { 923 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 924 cols[cnt++] = k + nc * (slot + gnx * l + p); 925 } 926 } 927 } 928 rows[k] = k + nc * (slot); 929 } 930 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 931 } 932 } 933 PetscCall(PetscFree(values)); 934 /* do not copy values to GPU since they are all zero and not yet needed there */ 935 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 936 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 937 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 938 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 939 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 940 } 941 PetscCall(PetscFree2(rows, cols)); 942 PetscFunctionReturn(PETSC_SUCCESS); 943 } 944 945 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 946 { 947 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 948 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 949 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 950 MPI_Comm comm; 951 PetscScalar *values; 952 DMBoundaryType bx, by, bz; 953 ISLocalToGlobalMapping ltog; 954 DMDAStencilType st; 955 956 PetscFunctionBegin; 957 /* 958 nc - number of components per grid point 959 col - number of colors needed in one direction for single component problem 960 */ 961 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 962 col = 2 * s + 1; 963 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 964 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 965 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 966 967 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 968 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 969 970 PetscCall(MatSetBlockSize(J, nc)); 971 /* determine the matrix preallocation information */ 972 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 973 for (i = xs; i < xs + nx; i++) { 974 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 975 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 976 for (j = ys; j < ys + ny; j++) { 977 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 978 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 979 for (k = zs; k < zs + nz; k++) { 980 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 981 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 982 983 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 984 985 cnt = 0; 986 for (l = 0; l < nc; l++) { 987 for (ii = istart; ii < iend + 1; ii++) { 988 for (jj = jstart; jj < jend + 1; jj++) { 989 for (kk = kstart; kk < kend + 1; kk++) { 990 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 991 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 992 } 993 } 994 } 995 } 996 rows[l] = l + nc * (slot); 997 } 998 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 999 } 1000 } 1001 } 1002 PetscCall(MatSetBlockSize(J, nc)); 1003 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1004 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1005 MatPreallocateEnd(dnz, onz); 1006 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1007 1008 /* 1009 For each node in the grid: we get the neighbors in the local (on processor ordering 1010 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1011 PETSc ordering. 1012 */ 1013 if (!da->prealloc_only) { 1014 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1015 for (i = xs; i < xs + nx; i++) { 1016 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1017 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1018 for (j = ys; j < ys + ny; j++) { 1019 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1020 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1021 for (k = zs; k < zs + nz; k++) { 1022 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1023 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1024 1025 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1026 1027 cnt = 0; 1028 for (l = 0; l < nc; l++) { 1029 for (ii = istart; ii < iend + 1; ii++) { 1030 for (jj = jstart; jj < jend + 1; jj++) { 1031 for (kk = kstart; kk < kend + 1; kk++) { 1032 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1033 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1034 } 1035 } 1036 } 1037 } 1038 rows[l] = l + nc * (slot); 1039 } 1040 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1041 } 1042 } 1043 } 1044 PetscCall(PetscFree(values)); 1045 /* do not copy values to GPU since they are all zero and not yet needed there */ 1046 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1047 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1048 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1049 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1050 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1051 } 1052 PetscCall(PetscFree2(rows, cols)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1057 { 1058 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1059 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1060 MPI_Comm comm; 1061 DMBoundaryType bx, by; 1062 ISLocalToGlobalMapping ltog, mltog; 1063 DMDAStencilType st; 1064 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1065 1066 PetscFunctionBegin; 1067 /* 1068 nc - number of components per grid point 1069 col - number of colors needed in one direction for single component problem 1070 */ 1071 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1072 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1073 col = 2 * s + 1; 1074 /* 1075 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1076 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1077 */ 1078 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1079 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1080 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1081 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1082 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1083 1084 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1085 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1086 1087 PetscCall(MatSetBlockSize(J, nc)); 1088 /* determine the matrix preallocation information */ 1089 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1090 for (i = xs; i < xs + nx; i++) { 1091 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1092 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1093 1094 for (j = ys; j < ys + ny; j++) { 1095 slot = i - gxs + gnx * (j - gys); 1096 1097 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1098 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1099 1100 cnt = 0; 1101 for (k = 0; k < nc; k++) { 1102 for (l = lstart; l < lend + 1; l++) { 1103 for (p = pstart; p < pend + 1; p++) { 1104 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1105 cols[cnt++] = k + nc * (slot + gnx * l + p); 1106 } 1107 } 1108 } 1109 rows[k] = k + nc * (slot); 1110 } 1111 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1112 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1113 } 1114 } 1115 PetscCall(MatSetBlockSize(J, nc)); 1116 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1117 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1118 MatPreallocateEnd(dnz, onz); 1119 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1120 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1121 1122 /* 1123 For each node in the grid: we get the neighbors in the local (on processor ordering 1124 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1125 PETSc ordering. 1126 */ 1127 if (!da->prealloc_only) { 1128 for (i = xs; i < xs + nx; i++) { 1129 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1130 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1131 1132 for (j = ys; j < ys + ny; j++) { 1133 slot = i - gxs + gnx * (j - gys); 1134 1135 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1136 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1137 1138 cnt = 0; 1139 for (l = lstart; l < lend + 1; l++) { 1140 for (p = pstart; p < pend + 1; p++) { 1141 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1142 cols[cnt++] = nc * (slot + gnx * l + p); 1143 for (k = 1; k < nc; k++) { 1144 cols[cnt] = 1 + cols[cnt - 1]; 1145 cnt++; 1146 } 1147 } 1148 } 1149 } 1150 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1151 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1152 } 1153 } 1154 /* do not copy values to GPU since they are all zero and not yet needed there */ 1155 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1156 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1157 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1158 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1159 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1160 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1161 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1162 } 1163 PetscCall(PetscFree2(rows, cols)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1168 { 1169 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1170 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1171 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1172 DM_DA *dd = (DM_DA *)da->data; 1173 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1174 MPI_Comm comm; 1175 DMBoundaryType bx, by; 1176 ISLocalToGlobalMapping ltog; 1177 DMDAStencilType st; 1178 PetscBool removedups = PETSC_FALSE; 1179 1180 PetscFunctionBegin; 1181 /* 1182 nc - number of components per grid point 1183 col - number of colors needed in one direction for single component problem 1184 */ 1185 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1186 col = 2 * s + 1; 1187 /* 1188 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1189 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1190 */ 1191 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1192 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1193 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1194 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1195 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1196 1197 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1198 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1199 1200 PetscCall(MatSetBlockSize(J, nc)); 1201 /* determine the matrix preallocation information */ 1202 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1203 for (i = xs; i < xs + nx; i++) { 1204 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1205 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1206 1207 for (j = ys; j < ys + ny; j++) { 1208 slot = i - gxs + gnx * (j - gys); 1209 1210 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1211 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1212 1213 for (k = 0; k < nc; k++) { 1214 cnt = 0; 1215 for (l = lstart; l < lend + 1; l++) { 1216 for (p = pstart; p < pend + 1; p++) { 1217 if (l || p) { 1218 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1219 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1220 } 1221 } else { 1222 if (dfill) { 1223 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1224 } else { 1225 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1226 } 1227 } 1228 } 1229 } 1230 row = k + nc * (slot); 1231 maxcnt = PetscMax(maxcnt, cnt); 1232 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1233 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1234 } 1235 } 1236 } 1237 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1238 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1239 MatPreallocateEnd(dnz, onz); 1240 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1241 1242 /* 1243 For each node in the grid: we get the neighbors in the local (on processor ordering 1244 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1245 PETSc ordering. 1246 */ 1247 if (!da->prealloc_only) { 1248 for (i = xs; i < xs + nx; i++) { 1249 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1250 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1251 1252 for (j = ys; j < ys + ny; j++) { 1253 slot = i - gxs + gnx * (j - gys); 1254 1255 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1256 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1257 1258 for (k = 0; k < nc; k++) { 1259 cnt = 0; 1260 for (l = lstart; l < lend + 1; l++) { 1261 for (p = pstart; p < pend + 1; p++) { 1262 if (l || p) { 1263 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1264 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1265 } 1266 } else { 1267 if (dfill) { 1268 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1269 } else { 1270 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1271 } 1272 } 1273 } 1274 } 1275 row = k + nc * (slot); 1276 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1277 } 1278 } 1279 } 1280 /* do not copy values to GPU since they are all zero and not yet needed there */ 1281 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1282 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1283 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1284 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1285 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1286 } 1287 PetscCall(PetscFree(cols)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 1291 /* ---------------------------------------------------------------------------------*/ 1292 1293 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1294 { 1295 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1296 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1297 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1298 MPI_Comm comm; 1299 DMBoundaryType bx, by, bz; 1300 ISLocalToGlobalMapping ltog, mltog; 1301 DMDAStencilType st; 1302 PetscBool removedups = PETSC_FALSE; 1303 1304 PetscFunctionBegin; 1305 /* 1306 nc - number of components per grid point 1307 col - number of colors needed in one direction for single component problem 1308 */ 1309 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1310 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1311 col = 2 * s + 1; 1312 1313 /* 1314 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1315 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1316 */ 1317 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1318 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1319 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1320 1321 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1322 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1323 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1324 1325 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1326 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1327 1328 PetscCall(MatSetBlockSize(J, nc)); 1329 /* determine the matrix preallocation information */ 1330 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1331 for (i = xs; i < xs + nx; i++) { 1332 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1333 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1334 for (j = ys; j < ys + ny; j++) { 1335 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1336 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1337 for (k = zs; k < zs + nz; k++) { 1338 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1339 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1340 1341 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1342 1343 cnt = 0; 1344 for (l = 0; l < nc; l++) { 1345 for (ii = istart; ii < iend + 1; ii++) { 1346 for (jj = jstart; jj < jend + 1; jj++) { 1347 for (kk = kstart; kk < kend + 1; kk++) { 1348 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1349 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1350 } 1351 } 1352 } 1353 } 1354 rows[l] = l + nc * (slot); 1355 } 1356 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1357 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1358 } 1359 } 1360 } 1361 PetscCall(MatSetBlockSize(J, nc)); 1362 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1363 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1364 MatPreallocateEnd(dnz, onz); 1365 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1366 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1367 1368 /* 1369 For each node in the grid: we get the neighbors in the local (on processor ordering 1370 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1371 PETSc ordering. 1372 */ 1373 if (!da->prealloc_only) { 1374 for (i = xs; i < xs + nx; i++) { 1375 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1376 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1377 for (j = ys; j < ys + ny; j++) { 1378 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1379 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1380 for (k = zs; k < zs + nz; k++) { 1381 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1382 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1383 1384 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1385 1386 cnt = 0; 1387 for (kk = kstart; kk < kend + 1; kk++) { 1388 for (jj = jstart; jj < jend + 1; jj++) { 1389 for (ii = istart; ii < iend + 1; ii++) { 1390 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1391 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1392 for (l = 1; l < nc; l++) { 1393 cols[cnt] = 1 + cols[cnt - 1]; 1394 cnt++; 1395 } 1396 } 1397 } 1398 } 1399 } 1400 rows[0] = nc * (slot); 1401 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1402 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1403 } 1404 } 1405 } 1406 /* do not copy values to GPU since they are all zero and not yet needed there */ 1407 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1408 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1409 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1410 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1411 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1412 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1413 } 1414 PetscCall(PetscFree2(rows, cols)); 1415 PetscFunctionReturn(PETSC_SUCCESS); 1416 } 1417 1418 /* ---------------------------------------------------------------------------------*/ 1419 1420 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1421 { 1422 DM_DA *dd = (DM_DA *)da->data; 1423 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1424 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1425 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1426 DMBoundaryType bx; 1427 ISLocalToGlobalMapping ltog; 1428 PetscMPIInt rank, size; 1429 1430 PetscFunctionBegin; 1431 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1432 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1433 1434 /* 1435 nc - number of components per grid point 1436 */ 1437 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1438 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1439 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1440 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1441 1442 PetscCall(MatSetBlockSize(J, nc)); 1443 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1444 1445 /* 1446 note should be smaller for first and last process with no periodic 1447 does not handle dfill 1448 */ 1449 cnt = 0; 1450 /* coupling with process to the left */ 1451 for (i = 0; i < s; i++) { 1452 for (j = 0; j < nc; j++) { 1453 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1454 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1455 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1456 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1457 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1458 } 1459 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1460 cnt++; 1461 } 1462 } 1463 for (i = s; i < nx - s; i++) { 1464 for (j = 0; j < nc; j++) { 1465 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1466 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1467 cnt++; 1468 } 1469 } 1470 /* coupling with process to the right */ 1471 for (i = nx - s; i < nx; i++) { 1472 for (j = 0; j < nc; j++) { 1473 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1474 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1475 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1476 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1477 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1478 } 1479 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1480 cnt++; 1481 } 1482 } 1483 1484 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1485 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1486 PetscCall(PetscFree2(cols, ocols)); 1487 1488 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1489 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1490 1491 /* 1492 For each node in the grid: we get the neighbors in the local (on processor ordering 1493 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1494 PETSc ordering. 1495 */ 1496 if (!da->prealloc_only) { 1497 PetscCall(PetscMalloc1(maxcnt, &cols)); 1498 row = xs * nc; 1499 /* coupling with process to the left */ 1500 for (i = xs; i < xs + s; i++) { 1501 for (j = 0; j < nc; j++) { 1502 cnt = 0; 1503 if (rank) { 1504 for (l = 0; l < s; l++) { 1505 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1506 } 1507 } 1508 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1509 for (l = 0; l < s; l++) { 1510 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1511 } 1512 } 1513 if (dfill) { 1514 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1515 } else { 1516 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1517 } 1518 for (l = 0; l < s; l++) { 1519 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1520 } 1521 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1522 row++; 1523 } 1524 } 1525 for (i = xs + s; i < xs + nx - s; i++) { 1526 for (j = 0; j < nc; j++) { 1527 cnt = 0; 1528 for (l = 0; l < s; l++) { 1529 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1530 } 1531 if (dfill) { 1532 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1533 } else { 1534 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1535 } 1536 for (l = 0; l < s; l++) { 1537 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1538 } 1539 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1540 row++; 1541 } 1542 } 1543 /* coupling with process to the right */ 1544 for (i = xs + nx - s; i < xs + nx; i++) { 1545 for (j = 0; j < nc; j++) { 1546 cnt = 0; 1547 for (l = 0; l < s; l++) { 1548 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1549 } 1550 if (dfill) { 1551 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1552 } else { 1553 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1554 } 1555 if (rank < size - 1) { 1556 for (l = 0; l < s; l++) { 1557 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1558 } 1559 } 1560 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1561 for (l = 0; l < s; l++) { 1562 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1563 } 1564 } 1565 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1566 row++; 1567 } 1568 } 1569 PetscCall(PetscFree(cols)); 1570 /* do not copy values to GPU since they are all zero and not yet needed there */ 1571 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1572 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1575 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1576 } 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 /* ---------------------------------------------------------------------------------*/ 1581 1582 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1583 { 1584 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1585 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1586 PetscInt istart, iend; 1587 DMBoundaryType bx; 1588 ISLocalToGlobalMapping ltog, mltog; 1589 1590 PetscFunctionBegin; 1591 /* 1592 nc - number of components per grid point 1593 col - number of colors needed in one direction for single component problem 1594 */ 1595 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1596 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1597 col = 2 * s + 1; 1598 1599 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1600 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1601 1602 PetscCall(MatSetBlockSize(J, nc)); 1603 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1604 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1605 1606 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1607 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1608 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1609 1610 /* 1611 For each node in the grid: we get the neighbors in the local (on processor ordering 1612 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1613 PETSc ordering. 1614 */ 1615 if (!da->prealloc_only) { 1616 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1617 for (i = xs; i < xs + nx; i++) { 1618 istart = PetscMax(-s, gxs - i); 1619 iend = PetscMin(s, gxs + gnx - i - 1); 1620 slot = i - gxs; 1621 1622 cnt = 0; 1623 for (i1 = istart; i1 < iend + 1; i1++) { 1624 cols[cnt++] = nc * (slot + i1); 1625 for (l = 1; l < nc; l++) { 1626 cols[cnt] = 1 + cols[cnt - 1]; 1627 cnt++; 1628 } 1629 } 1630 rows[0] = nc * (slot); 1631 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1632 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1633 } 1634 /* do not copy values to GPU since they are all zero and not yet needed there */ 1635 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1636 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1637 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1638 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1639 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1640 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1641 PetscCall(PetscFree2(rows, cols)); 1642 } 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 /* ---------------------------------------------------------------------------------*/ 1647 1648 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1649 { 1650 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1651 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1652 PetscInt istart, iend; 1653 DMBoundaryType bx; 1654 ISLocalToGlobalMapping ltog, mltog; 1655 1656 PetscFunctionBegin; 1657 /* 1658 nc - number of components per grid point 1659 col - number of colors needed in one direction for single component problem 1660 */ 1661 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1662 col = 2 * s + 1; 1663 1664 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1665 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1666 1667 PetscCall(MatSetBlockSize(J, nc)); 1668 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1669 1670 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1671 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1672 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1673 1674 /* 1675 For each node in the grid: we get the neighbors in the local (on processor ordering 1676 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1677 PETSc ordering. 1678 */ 1679 if (!da->prealloc_only) { 1680 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1681 for (i = xs; i < xs + nx; i++) { 1682 istart = PetscMax(-s, gxs - i); 1683 iend = PetscMin(s, gxs + gnx - i - 1); 1684 slot = i - gxs; 1685 1686 cnt = 0; 1687 for (i1 = istart; i1 < iend + 1; i1++) { 1688 cols[cnt++] = nc * (slot + i1); 1689 for (l = 1; l < nc; l++) { 1690 cols[cnt] = 1 + cols[cnt - 1]; 1691 cnt++; 1692 } 1693 } 1694 rows[0] = nc * (slot); 1695 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1696 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1697 } 1698 /* do not copy values to GPU since they are all zero and not yet needed there */ 1699 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1700 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1701 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1702 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1703 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1704 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1705 PetscCall(PetscFree2(rows, cols)); 1706 } 1707 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1708 PetscFunctionReturn(PETSC_SUCCESS); 1709 } 1710 1711 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1712 { 1713 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1714 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1715 PetscInt istart, iend, jstart, jend, ii, jj; 1716 MPI_Comm comm; 1717 PetscScalar *values; 1718 DMBoundaryType bx, by; 1719 DMDAStencilType st; 1720 ISLocalToGlobalMapping ltog; 1721 1722 PetscFunctionBegin; 1723 /* 1724 nc - number of components per grid point 1725 col - number of colors needed in one direction for single component problem 1726 */ 1727 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1728 col = 2 * s + 1; 1729 1730 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1731 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1732 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1733 1734 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1735 1736 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1737 1738 /* determine the matrix preallocation information */ 1739 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1740 for (i = xs; i < xs + nx; i++) { 1741 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1742 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1743 for (j = ys; j < ys + ny; j++) { 1744 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1745 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1746 slot = i - gxs + gnx * (j - gys); 1747 1748 /* Find block columns in block row */ 1749 cnt = 0; 1750 for (ii = istart; ii < iend + 1; ii++) { 1751 for (jj = jstart; jj < jend + 1; jj++) { 1752 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1753 cols[cnt++] = slot + ii + gnx * jj; 1754 } 1755 } 1756 } 1757 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1758 } 1759 } 1760 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1761 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1762 MatPreallocateEnd(dnz, onz); 1763 1764 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1765 1766 /* 1767 For each node in the grid: we get the neighbors in the local (on processor ordering 1768 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1769 PETSc ordering. 1770 */ 1771 if (!da->prealloc_only) { 1772 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1773 for (i = xs; i < xs + nx; i++) { 1774 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1775 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1776 for (j = ys; j < ys + ny; j++) { 1777 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1778 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1779 slot = i - gxs + gnx * (j - gys); 1780 cnt = 0; 1781 for (ii = istart; ii < iend + 1; ii++) { 1782 for (jj = jstart; jj < jend + 1; jj++) { 1783 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1784 cols[cnt++] = slot + ii + gnx * jj; 1785 } 1786 } 1787 } 1788 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1789 } 1790 } 1791 PetscCall(PetscFree(values)); 1792 /* do not copy values to GPU since they are all zero and not yet needed there */ 1793 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1794 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1795 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1796 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1797 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1798 } 1799 PetscCall(PetscFree(cols)); 1800 PetscFunctionReturn(PETSC_SUCCESS); 1801 } 1802 1803 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1804 { 1805 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1806 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1807 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1808 MPI_Comm comm; 1809 PetscScalar *values; 1810 DMBoundaryType bx, by, bz; 1811 DMDAStencilType st; 1812 ISLocalToGlobalMapping ltog; 1813 1814 PetscFunctionBegin; 1815 /* 1816 nc - number of components per grid point 1817 col - number of colors needed in one direction for single component problem 1818 */ 1819 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1820 col = 2 * s + 1; 1821 1822 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1823 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1824 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1825 1826 PetscCall(PetscMalloc1(col * col * col, &cols)); 1827 1828 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1829 1830 /* determine the matrix preallocation information */ 1831 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1832 for (i = xs; i < xs + nx; i++) { 1833 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1834 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1835 for (j = ys; j < ys + ny; j++) { 1836 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1837 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1838 for (k = zs; k < zs + nz; k++) { 1839 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1840 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1841 1842 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1843 1844 /* Find block columns in block row */ 1845 cnt = 0; 1846 for (ii = istart; ii < iend + 1; ii++) { 1847 for (jj = jstart; jj < jend + 1; jj++) { 1848 for (kk = kstart; kk < kend + 1; kk++) { 1849 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1850 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1851 } 1852 } 1853 } 1854 } 1855 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1856 } 1857 } 1858 } 1859 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1860 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1861 MatPreallocateEnd(dnz, onz); 1862 1863 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1864 1865 /* 1866 For each node in the grid: we get the neighbors in the local (on processor ordering 1867 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1868 PETSc ordering. 1869 */ 1870 if (!da->prealloc_only) { 1871 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1872 for (i = xs; i < xs + nx; i++) { 1873 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1874 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1875 for (j = ys; j < ys + ny; j++) { 1876 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1877 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1878 for (k = zs; k < zs + nz; k++) { 1879 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1880 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1881 1882 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1883 1884 cnt = 0; 1885 for (ii = istart; ii < iend + 1; ii++) { 1886 for (jj = jstart; jj < jend + 1; jj++) { 1887 for (kk = kstart; kk < kend + 1; kk++) { 1888 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1889 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1890 } 1891 } 1892 } 1893 } 1894 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1895 } 1896 } 1897 } 1898 PetscCall(PetscFree(values)); 1899 /* do not copy values to GPU since they are all zero and not yet needed there */ 1900 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1901 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1902 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1903 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1904 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1905 } 1906 PetscCall(PetscFree(cols)); 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 /* 1911 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1912 identify in the local ordering with periodic domain. 1913 */ 1914 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1915 { 1916 PetscInt i, n; 1917 1918 PetscFunctionBegin; 1919 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1920 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1921 for (i = 0, n = 0; i < *cnt; i++) { 1922 if (col[i] >= *row) col[n++] = col[i]; 1923 } 1924 *cnt = n; 1925 PetscFunctionReturn(PETSC_SUCCESS); 1926 } 1927 1928 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1929 { 1930 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1931 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1932 PetscInt istart, iend, jstart, jend, ii, jj; 1933 MPI_Comm comm; 1934 PetscScalar *values; 1935 DMBoundaryType bx, by; 1936 DMDAStencilType st; 1937 ISLocalToGlobalMapping ltog; 1938 1939 PetscFunctionBegin; 1940 /* 1941 nc - number of components per grid point 1942 col - number of colors needed in one direction for single component problem 1943 */ 1944 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1945 col = 2 * s + 1; 1946 1947 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1948 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1949 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1950 1951 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1952 1953 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1954 1955 /* determine the matrix preallocation information */ 1956 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1957 for (i = xs; i < xs + nx; i++) { 1958 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1959 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1960 for (j = ys; j < ys + ny; j++) { 1961 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1962 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1963 slot = i - gxs + gnx * (j - gys); 1964 1965 /* Find block columns in block row */ 1966 cnt = 0; 1967 for (ii = istart; ii < iend + 1; ii++) { 1968 for (jj = jstart; jj < jend + 1; jj++) { 1969 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1970 } 1971 } 1972 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1973 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1974 } 1975 } 1976 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1977 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1978 MatPreallocateEnd(dnz, onz); 1979 1980 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1981 1982 /* 1983 For each node in the grid: we get the neighbors in the local (on processor ordering 1984 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1985 PETSc ordering. 1986 */ 1987 if (!da->prealloc_only) { 1988 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1989 for (i = xs; i < xs + nx; i++) { 1990 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1991 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1992 for (j = ys; j < ys + ny; j++) { 1993 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1994 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1995 slot = i - gxs + gnx * (j - gys); 1996 1997 /* Find block columns in block row */ 1998 cnt = 0; 1999 for (ii = istart; ii < iend + 1; ii++) { 2000 for (jj = jstart; jj < jend + 1; jj++) { 2001 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2002 } 2003 } 2004 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2005 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2006 } 2007 } 2008 PetscCall(PetscFree(values)); 2009 /* do not copy values to GPU since they are all zero and not yet needed there */ 2010 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2011 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2012 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2013 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2014 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2015 } 2016 PetscCall(PetscFree(cols)); 2017 PetscFunctionReturn(PETSC_SUCCESS); 2018 } 2019 2020 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2021 { 2022 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2023 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2024 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2025 MPI_Comm comm; 2026 PetscScalar *values; 2027 DMBoundaryType bx, by, bz; 2028 DMDAStencilType st; 2029 ISLocalToGlobalMapping ltog; 2030 2031 PetscFunctionBegin; 2032 /* 2033 nc - number of components per grid point 2034 col - number of colors needed in one direction for single component problem 2035 */ 2036 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2037 col = 2 * s + 1; 2038 2039 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2040 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2041 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2042 2043 /* create the matrix */ 2044 PetscCall(PetscMalloc1(col * col * col, &cols)); 2045 2046 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2047 2048 /* determine the matrix preallocation information */ 2049 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2050 for (i = xs; i < xs + nx; i++) { 2051 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2052 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2053 for (j = ys; j < ys + ny; j++) { 2054 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2055 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2056 for (k = zs; k < zs + nz; k++) { 2057 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2058 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2059 2060 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2061 2062 /* Find block columns in block row */ 2063 cnt = 0; 2064 for (ii = istart; ii < iend + 1; ii++) { 2065 for (jj = jstart; jj < jend + 1; jj++) { 2066 for (kk = kstart; kk < kend + 1; kk++) { 2067 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2068 } 2069 } 2070 } 2071 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2072 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2073 } 2074 } 2075 } 2076 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2077 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2078 MatPreallocateEnd(dnz, onz); 2079 2080 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2081 2082 /* 2083 For each node in the grid: we get the neighbors in the local (on processor ordering 2084 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2085 PETSc ordering. 2086 */ 2087 if (!da->prealloc_only) { 2088 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2089 for (i = xs; i < xs + nx; i++) { 2090 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2091 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2092 for (j = ys; j < ys + ny; j++) { 2093 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2094 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2095 for (k = zs; k < zs + nz; k++) { 2096 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2097 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2098 2099 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2100 2101 cnt = 0; 2102 for (ii = istart; ii < iend + 1; ii++) { 2103 for (jj = jstart; jj < jend + 1; jj++) { 2104 for (kk = kstart; kk < kend + 1; kk++) { 2105 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2106 } 2107 } 2108 } 2109 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2110 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2111 } 2112 } 2113 } 2114 PetscCall(PetscFree(values)); 2115 /* do not copy values to GPU since they are all zero and not yet needed there */ 2116 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2117 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2118 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2119 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2120 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2121 } 2122 PetscCall(PetscFree(cols)); 2123 PetscFunctionReturn(PETSC_SUCCESS); 2124 } 2125 2126 /* ---------------------------------------------------------------------------------*/ 2127 2128 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2129 { 2130 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2131 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2132 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2133 DM_DA *dd = (DM_DA *)da->data; 2134 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2135 MPI_Comm comm; 2136 PetscScalar *values; 2137 DMBoundaryType bx, by, bz; 2138 ISLocalToGlobalMapping ltog; 2139 DMDAStencilType st; 2140 PetscBool removedups = PETSC_FALSE; 2141 2142 PetscFunctionBegin; 2143 /* 2144 nc - number of components per grid point 2145 col - number of colors needed in one direction for single component problem 2146 */ 2147 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2148 col = 2 * s + 1; 2149 2150 /* 2151 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2152 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2153 */ 2154 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2155 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2156 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2157 2158 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2159 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2160 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2161 2162 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2163 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2164 2165 /* determine the matrix preallocation information */ 2166 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2167 2168 PetscCall(MatSetBlockSize(J, nc)); 2169 for (i = xs; i < xs + nx; i++) { 2170 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2171 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2172 for (j = ys; j < ys + ny; j++) { 2173 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2174 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2175 for (k = zs; k < zs + nz; k++) { 2176 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2177 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2178 2179 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2180 2181 for (l = 0; l < nc; l++) { 2182 cnt = 0; 2183 for (ii = istart; ii < iend + 1; ii++) { 2184 for (jj = jstart; jj < jend + 1; jj++) { 2185 for (kk = kstart; kk < kend + 1; kk++) { 2186 if (ii || jj || kk) { 2187 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2188 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2189 } 2190 } else { 2191 if (dfill) { 2192 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2193 } else { 2194 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2195 } 2196 } 2197 } 2198 } 2199 } 2200 row = l + nc * (slot); 2201 maxcnt = PetscMax(maxcnt, cnt); 2202 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2203 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2204 } 2205 } 2206 } 2207 } 2208 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2209 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2210 MatPreallocateEnd(dnz, onz); 2211 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2212 2213 /* 2214 For each node in the grid: we get the neighbors in the local (on processor ordering 2215 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2216 PETSc ordering. 2217 */ 2218 if (!da->prealloc_only) { 2219 PetscCall(PetscCalloc1(maxcnt, &values)); 2220 for (i = xs; i < xs + nx; i++) { 2221 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2222 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2223 for (j = ys; j < ys + ny; j++) { 2224 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2225 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2226 for (k = zs; k < zs + nz; k++) { 2227 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2228 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2229 2230 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2231 2232 for (l = 0; l < nc; l++) { 2233 cnt = 0; 2234 for (ii = istart; ii < iend + 1; ii++) { 2235 for (jj = jstart; jj < jend + 1; jj++) { 2236 for (kk = kstart; kk < kend + 1; kk++) { 2237 if (ii || jj || kk) { 2238 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2239 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2240 } 2241 } else { 2242 if (dfill) { 2243 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2244 } else { 2245 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2246 } 2247 } 2248 } 2249 } 2250 } 2251 row = l + nc * (slot); 2252 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2253 } 2254 } 2255 } 2256 } 2257 PetscCall(PetscFree(values)); 2258 /* do not copy values to GPU since they are all zero and not yet needed there */ 2259 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2260 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2261 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2262 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2263 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2264 } 2265 PetscCall(PetscFree(cols)); 2266 PetscFunctionReturn(PETSC_SUCCESS); 2267 } 2268