1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 72 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 73 columns to the left with any nonzeros in them plus 1 */ 74 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 75 for (i = 0; i < dd->w; i++) { 76 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 77 } 78 for (i = 0; i < dd->w; i++) { 79 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /*@ 85 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 86 of the matrix returned by `DMCreateMatrix()`. 87 88 Logically Collective 89 90 Input Parameters: 91 + da - the distributed array 92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 93 - ofill - the fill pattern in the off-diagonal blocks 94 95 Level: developer 96 97 Notes: 98 This only makes sense when you are doing multicomponent problems but using the 99 `MATMPIAIJ` matrix format 100 101 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 102 representing coupling and 0 entries for missing coupling. For example 103 .vb 104 dfill[9] = {1, 0, 0, 105 1, 1, 0, 106 0, 1, 1} 107 .ve 108 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 109 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 110 diagonal block). 111 112 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 113 can be represented in the dfill, ofill format 114 115 Contributed by\: 116 Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective 140 141 Input Parameters: 142 + da - the distributed array 143 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 144 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for `dfill` and `ofill` is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the `dfill`, `ofill` format 168 169 Contributed by\: 170 Philip C. Roth 171 172 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 173 @*/ 174 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 175 { 176 DM_DA *dd = (DM_DA *)da->data; 177 178 PetscFunctionBegin; 179 /* save the given dfill and ofill information */ 180 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 181 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 182 183 /* count nonzeros in ofill columns */ 184 PetscCall(DMDASetBlockFills_Private2(dd)); 185 PetscFunctionReturn(PETSC_SUCCESS); 186 } 187 188 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 189 { 190 PetscInt dim, m, n, p, nc; 191 DMBoundaryType bx, by, bz; 192 MPI_Comm comm; 193 PetscMPIInt size; 194 PetscBool isBAIJ; 195 DM_DA *dd = (DM_DA *)da->data; 196 197 PetscFunctionBegin; 198 /* 199 m 200 ------------------------------------------------------ 201 | | 202 | | 203 | ---------------------- | 204 | | | | 205 n | yn | | | 206 | | | | 207 | .--------------------- | 208 | (xs,ys) xn | 209 | . | 210 | (gxs,gys) | 211 | | 212 ----------------------------------------------------- 213 */ 214 215 /* 216 nc - number of components per grid point 217 col - number of colors needed in one direction for single component problem 218 219 */ 220 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 221 222 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 223 PetscCallMPI(MPI_Comm_size(comm, &size)); 224 if (ctype == IS_COLORING_LOCAL) { 225 if (size == 1) { 226 ctype = IS_COLORING_GLOBAL; 227 } else { 228 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 229 } 230 } 231 232 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 233 matrices is for the blocks, not the individual matrix elements */ 234 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 236 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 237 if (isBAIJ) { 238 dd->w = 1; 239 dd->xs = dd->xs / nc; 240 dd->xe = dd->xe / nc; 241 dd->Xs = dd->Xs / nc; 242 dd->Xe = dd->Xe / nc; 243 } 244 245 /* 246 We do not provide a getcoloring function in the DMDA operations because 247 the basic DMDA does not know about matrices. We think of DMDA as being 248 more low-level then matrices. 249 */ 250 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 252 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 253 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 254 if (isBAIJ) { 255 dd->w = nc; 256 dd->xs = dd->xs * nc; 257 dd->xe = dd->xe * nc; 258 dd->Xs = dd->Xs * nc; 259 dd->Xe = dd->Xe * nc; 260 } 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 265 { 266 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 267 PetscInt ncolors = 0; 268 MPI_Comm comm; 269 DMBoundaryType bx, by; 270 DMDAStencilType st; 271 ISColoringValue *colors; 272 DM_DA *dd = (DM_DA *)da->data; 273 274 PetscFunctionBegin; 275 /* 276 nc - number of components per grid point 277 col - number of colors needed in one direction for single component problem 278 279 */ 280 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 281 col = 2 * s + 1; 282 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 283 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 284 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 285 286 /* special case as taught to us by Paul Hovland */ 287 if (st == DMDA_STENCIL_STAR && s == 1) { 288 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 289 } else { 290 if (ctype == IS_COLORING_GLOBAL) { 291 if (!dd->localcoloring) { 292 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 293 ii = 0; 294 for (j = ys; j < ys + ny; j++) { 295 for (i = xs; i < xs + nx; i++) { 296 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 297 } 298 } 299 ncolors = nc + nc * (col - 1 + col * (col - 1)); 300 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 301 } 302 *coloring = dd->localcoloring; 303 } else if (ctype == IS_COLORING_LOCAL) { 304 if (!dd->ghostedcoloring) { 305 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 306 ii = 0; 307 for (j = gys; j < gys + gny; j++) { 308 for (i = gxs; i < gxs + gnx; i++) { 309 for (k = 0; k < nc; k++) { 310 /* the complicated stuff is to handle periodic boundaries */ 311 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 312 } 313 } 314 } 315 ncolors = nc + nc * (col - 1 + col * (col - 1)); 316 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 317 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 318 319 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 320 } 321 *coloring = dd->ghostedcoloring; 322 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 323 } 324 PetscCall(ISColoringReference(*coloring)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 329 { 330 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 331 PetscInt ncolors; 332 MPI_Comm comm; 333 DMBoundaryType bx, by, bz; 334 DMDAStencilType st; 335 ISColoringValue *colors; 336 DM_DA *dd = (DM_DA *)da->data; 337 338 PetscFunctionBegin; 339 /* 340 nc - number of components per grid point 341 col - number of colors needed in one direction for single component problem 342 */ 343 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 344 col = 2 * s + 1; 345 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 346 by 2*stencil_width + 1\n"); 347 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 348 by 2*stencil_width + 1\n"); 349 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 350 by 2*stencil_width + 1\n"); 351 352 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 353 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 354 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 355 356 /* create the coloring */ 357 if (ctype == IS_COLORING_GLOBAL) { 358 if (!dd->localcoloring) { 359 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 360 ii = 0; 361 for (k = zs; k < zs + nz; k++) { 362 for (j = ys; j < ys + ny; j++) { 363 for (i = xs; i < xs + nx; i++) { 364 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 365 } 366 } 367 } 368 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 369 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 370 } 371 *coloring = dd->localcoloring; 372 } else if (ctype == IS_COLORING_LOCAL) { 373 if (!dd->ghostedcoloring) { 374 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 375 ii = 0; 376 for (k = gzs; k < gzs + gnz; k++) { 377 for (j = gys; j < gys + gny; j++) { 378 for (i = gxs; i < gxs + gnx; i++) { 379 for (l = 0; l < nc; l++) { 380 /* the complicated stuff is to handle periodic boundaries */ 381 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 382 } 383 } 384 } 385 } 386 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 387 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 388 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 389 } 390 *coloring = dd->ghostedcoloring; 391 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 392 PetscCall(ISColoringReference(*coloring)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 397 { 398 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 399 PetscInt ncolors; 400 MPI_Comm comm; 401 DMBoundaryType bx; 402 ISColoringValue *colors; 403 DM_DA *dd = (DM_DA *)da->data; 404 405 PetscFunctionBegin; 406 /* 407 nc - number of components per grid point 408 col - number of colors needed in one direction for single component problem 409 */ 410 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 411 col = 2 * s + 1; 412 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 413 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 414 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 415 416 /* create the coloring */ 417 if (ctype == IS_COLORING_GLOBAL) { 418 if (!dd->localcoloring) { 419 PetscCall(PetscMalloc1(nc * nx, &colors)); 420 if (dd->ofillcols) { 421 PetscInt tc = 0; 422 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 423 i1 = 0; 424 for (i = xs; i < xs + nx; i++) { 425 for (l = 0; l < nc; l++) { 426 if (dd->ofillcols[l] && (i % col)) { 427 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 428 } else { 429 colors[i1++] = l; 430 } 431 } 432 } 433 ncolors = nc + 2 * s * tc; 434 } else { 435 i1 = 0; 436 for (i = xs; i < xs + nx; i++) { 437 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 438 } 439 ncolors = nc + nc * (col - 1); 440 } 441 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 442 } 443 *coloring = dd->localcoloring; 444 } else if (ctype == IS_COLORING_LOCAL) { 445 if (!dd->ghostedcoloring) { 446 PetscCall(PetscMalloc1(nc * gnx, &colors)); 447 i1 = 0; 448 for (i = gxs; i < gxs + gnx; i++) { 449 for (l = 0; l < nc; l++) { 450 /* the complicated stuff is to handle periodic boundaries */ 451 colors[i1++] = l + nc * (SetInRange(i, m) % col); 452 } 453 } 454 ncolors = nc + nc * (col - 1); 455 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 456 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 457 } 458 *coloring = dd->ghostedcoloring; 459 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 460 PetscCall(ISColoringReference(*coloring)); 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 465 { 466 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 467 PetscInt ncolors; 468 MPI_Comm comm; 469 DMBoundaryType bx, by; 470 ISColoringValue *colors; 471 DM_DA *dd = (DM_DA *)da->data; 472 473 PetscFunctionBegin; 474 /* 475 nc - number of components per grid point 476 col - number of colors needed in one direction for single component problem 477 */ 478 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 479 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 480 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 481 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 482 /* create the coloring */ 483 if (ctype == IS_COLORING_GLOBAL) { 484 if (!dd->localcoloring) { 485 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 486 ii = 0; 487 for (j = ys; j < ys + ny; j++) { 488 for (i = xs; i < xs + nx; i++) { 489 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 490 } 491 } 492 ncolors = 5 * nc; 493 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 494 } 495 *coloring = dd->localcoloring; 496 } else if (ctype == IS_COLORING_LOCAL) { 497 if (!dd->ghostedcoloring) { 498 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 499 ii = 0; 500 for (j = gys; j < gys + gny; j++) { 501 for (i = gxs; i < gxs + gnx; i++) { 502 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 503 } 504 } 505 ncolors = 5 * nc; 506 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 507 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 508 } 509 *coloring = dd->ghostedcoloring; 510 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513 514 /* =========================================================================== */ 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 529 530 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 531 { 532 DM da; 533 const char *prefix; 534 Mat AA, Anatural; 535 AO ao; 536 PetscInt rstart, rend, *petsc, i; 537 IS is; 538 MPI_Comm comm; 539 PetscViewerFormat format; 540 PetscBool flag; 541 542 PetscFunctionBegin; 543 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 544 PetscCall(PetscViewerGetFormat(viewer, &format)); 545 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 546 547 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 548 PetscCall(MatGetDM(A, &da)); 549 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 550 551 PetscCall(DMDAGetAO(da, &ao)); 552 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 553 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 554 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 555 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 556 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 557 558 /* call viewer on natural ordering */ 559 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 560 if (flag) { 561 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 562 A = AA; 563 } 564 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 565 PetscCall(ISDestroy(&is)); 566 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 567 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 568 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 569 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 570 PetscCall(MatView(Anatural, viewer)); 571 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 572 PetscCall(MatDestroy(&Anatural)); 573 if (flag) PetscCall(MatDestroy(&AA)); 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 578 { 579 DM da; 580 Mat Anatural, Aapp; 581 AO ao; 582 PetscInt rstart, rend, *app, i, m, n, M, N; 583 IS is; 584 MPI_Comm comm; 585 586 PetscFunctionBegin; 587 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 588 PetscCall(MatGetDM(A, &da)); 589 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 590 591 /* Load the matrix in natural ordering */ 592 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 593 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 594 PetscCall(MatGetSize(A, &M, &N)); 595 PetscCall(MatGetLocalSize(A, &m, &n)); 596 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 597 PetscCall(MatLoad(Anatural, viewer)); 598 599 /* Map natural ordering to application ordering and create IS */ 600 PetscCall(DMDAGetAO(da, &ao)); 601 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 602 PetscCall(PetscMalloc1(rend - rstart, &app)); 603 for (i = rstart; i < rend; i++) app[i - rstart] = i; 604 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 605 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 606 607 /* Do permutation and replace header */ 608 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 609 PetscCall(MatHeaderReplace(A, &Aapp)); 610 PetscCall(ISDestroy(&is)); 611 PetscCall(MatDestroy(&Anatural)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 616 { 617 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 618 Mat A; 619 MPI_Comm comm; 620 MatType Atype; 621 MatType mtype; 622 PetscMPIInt size; 623 DM_DA *dd = (DM_DA *)da->data; 624 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 625 626 PetscFunctionBegin; 627 PetscCall(MatInitializePackage()); 628 mtype = da->mattype; 629 630 /* 631 m 632 ------------------------------------------------------ 633 | | 634 | | 635 | ---------------------- | 636 | | | | 637 n | ny | | | 638 | | | | 639 | .--------------------- | 640 | (xs,ys) nx | 641 | . | 642 | (gxs,gys) | 643 | | 644 ----------------------------------------------------- 645 */ 646 647 /* 648 nc - number of components per grid point 649 col - number of colors needed in one direction for single component problem 650 */ 651 M = dd->M; 652 N = dd->N; 653 P = dd->P; 654 dim = da->dim; 655 dof = dd->w; 656 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 657 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 658 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 659 PetscCall(MatCreate(comm, &A)); 660 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 661 PetscCall(MatSetType(A, mtype)); 662 PetscCall(MatSetFromOptions(A)); 663 if (dof * nx * ny * nz < da->bind_below) { 664 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 665 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 666 } 667 PetscCall(MatSetDM(A, da)); 668 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 669 PetscCall(MatGetType(A, &Atype)); 670 /* 671 We do not provide a getmatrix function in the DMDA operations because 672 the basic DMDA does not know about matrices. We think of DMDA as being more 673 more low-level than matrices. This is kind of cheating but, cause sometimes 674 we think of DMDA has higher level than matrices. 675 676 We could switch based on Atype (or mtype), but we do not since the 677 specialized setting routines depend only on the particular preallocation 678 details of the matrix, not the type itself. 679 */ 680 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 681 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 682 if (!aij) { 683 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 684 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 685 if (!baij) { 686 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 687 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 688 if (!sbaij) { 689 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 690 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 691 } 692 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 693 } 694 } 695 if (aij) { 696 if (dim == 1) { 697 if (dd->ofill) { 698 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 699 } else { 700 DMBoundaryType bx; 701 PetscMPIInt size; 702 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 703 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 704 if (size == 1 && bx == DM_BOUNDARY_NONE) { 705 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 706 } else { 707 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 708 } 709 } 710 } else if (dim == 2) { 711 if (dd->ofill) { 712 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 713 } else { 714 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 715 } 716 } else if (dim == 3) { 717 if (dd->ofill) { 718 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 719 } else { 720 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 721 } 722 } 723 } else if (baij) { 724 if (dim == 2) { 725 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 726 } else if (dim == 3) { 727 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 728 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 729 } else if (sbaij) { 730 if (dim == 2) { 731 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 732 } else if (dim == 3) { 733 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 734 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 735 } else if (sell) { 736 if (dim == 2) { 737 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 738 } else if (dim == 3) { 739 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 740 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 741 } else if (is) { 742 PetscCall(DMCreateMatrix_DA_IS(da, A)); 743 } else { 744 ISLocalToGlobalMapping ltog; 745 746 PetscCall(MatSetBlockSize(A, dof)); 747 PetscCall(MatSetUp(A)); 748 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 749 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 750 } 751 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 752 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 753 PetscCall(MatSetDM(A, da)); 754 PetscCallMPI(MPI_Comm_size(comm, &size)); 755 if (size > 1) { 756 /* change viewer to display matrix in natural ordering */ 757 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 758 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 759 } 760 *J = A; 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 765 766 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 767 { 768 DM_DA *da = (DM_DA *)dm->data; 769 Mat lJ, P; 770 ISLocalToGlobalMapping ltog; 771 IS is; 772 PetscBT bt; 773 const PetscInt *e_loc, *idx; 774 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 775 776 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 777 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 778 PetscFunctionBegin; 779 dof = da->w; 780 PetscCall(MatSetBlockSize(J, dof)); 781 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 782 783 /* flag local elements indices in local DMDA numbering */ 784 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 785 PetscCall(PetscBTCreate(nv / dof, &bt)); 786 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 787 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 788 789 /* filter out (set to -1) the global indices not used by the local elements */ 790 PetscCall(PetscMalloc1(nv / dof, &gidx)); 791 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 792 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 793 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 794 for (i = 0; i < nv / dof; i++) 795 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 796 PetscCall(PetscBTDestroy(&bt)); 797 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 798 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 799 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 800 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 801 PetscCall(ISDestroy(&is)); 802 803 /* Preallocation */ 804 if (dm->prealloc_skip) { 805 PetscCall(MatSetUp(J)); 806 } else { 807 PetscCall(MatISGetLocalMat(J, &lJ)); 808 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 809 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 810 PetscCall(MatSetType(P, MATPREALLOCATOR)); 811 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 812 PetscCall(MatGetSize(lJ, &N, NULL)); 813 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 814 PetscCall(MatSetSizes(P, n, n, N, N)); 815 PetscCall(MatSetBlockSize(P, dof)); 816 PetscCall(MatSetUp(P)); 817 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 818 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 819 PetscCall(MatISRestoreLocalMat(J, &lJ)); 820 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 821 PetscCall(MatDestroy(&P)); 822 823 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 824 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 825 } 826 PetscFunctionReturn(PETSC_SUCCESS); 827 } 828 829 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 830 { 831 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 832 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 833 MPI_Comm comm; 834 PetscScalar *values; 835 DMBoundaryType bx, by; 836 ISLocalToGlobalMapping ltog; 837 DMDAStencilType st; 838 839 PetscFunctionBegin; 840 /* 841 nc - number of components per grid point 842 col - number of colors needed in one direction for single component problem 843 */ 844 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 845 col = 2 * s + 1; 846 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 847 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 848 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 849 850 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 851 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 852 853 PetscCall(MatSetBlockSize(J, nc)); 854 /* determine the matrix preallocation information */ 855 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 856 for (i = xs; i < xs + nx; i++) { 857 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 858 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 859 860 for (j = ys; j < ys + ny; j++) { 861 slot = i - gxs + gnx * (j - gys); 862 863 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 864 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 865 866 cnt = 0; 867 for (k = 0; k < nc; k++) { 868 for (l = lstart; l < lend + 1; l++) { 869 for (p = pstart; p < pend + 1; p++) { 870 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 871 cols[cnt++] = k + nc * (slot + gnx * l + p); 872 } 873 } 874 } 875 rows[k] = k + nc * (slot); 876 } 877 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 878 } 879 } 880 PetscCall(MatSetBlockSize(J, nc)); 881 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 882 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 883 MatPreallocateEnd(dnz, onz); 884 885 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 886 887 /* 888 For each node in the grid: we get the neighbors in the local (on processor ordering 889 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 890 PETSc ordering. 891 */ 892 if (!da->prealloc_only) { 893 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 894 for (i = xs; i < xs + nx; i++) { 895 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 896 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 897 898 for (j = ys; j < ys + ny; j++) { 899 slot = i - gxs + gnx * (j - gys); 900 901 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 902 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 903 904 cnt = 0; 905 for (k = 0; k < nc; k++) { 906 for (l = lstart; l < lend + 1; l++) { 907 for (p = pstart; p < pend + 1; p++) { 908 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 909 cols[cnt++] = k + nc * (slot + gnx * l + p); 910 } 911 } 912 } 913 rows[k] = k + nc * (slot); 914 } 915 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 916 } 917 } 918 PetscCall(PetscFree(values)); 919 /* do not copy values to GPU since they are all zero and not yet needed there */ 920 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 921 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 922 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 924 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 925 } 926 PetscCall(PetscFree2(rows, cols)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } 929 930 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 931 { 932 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 933 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 934 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 935 MPI_Comm comm; 936 PetscScalar *values; 937 DMBoundaryType bx, by, bz; 938 ISLocalToGlobalMapping ltog; 939 DMDAStencilType st; 940 941 PetscFunctionBegin; 942 /* 943 nc - number of components per grid point 944 col - number of colors needed in one direction for single component problem 945 */ 946 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 947 col = 2 * s + 1; 948 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 949 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 950 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 951 952 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 953 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 954 955 PetscCall(MatSetBlockSize(J, nc)); 956 /* determine the matrix preallocation information */ 957 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 958 for (i = xs; i < xs + nx; i++) { 959 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 960 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 961 for (j = ys; j < ys + ny; j++) { 962 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 963 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 964 for (k = zs; k < zs + nz; k++) { 965 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 966 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 967 968 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 969 970 cnt = 0; 971 for (l = 0; l < nc; l++) { 972 for (ii = istart; ii < iend + 1; ii++) { 973 for (jj = jstart; jj < jend + 1; jj++) { 974 for (kk = kstart; kk < kend + 1; kk++) { 975 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 976 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 977 } 978 } 979 } 980 } 981 rows[l] = l + nc * (slot); 982 } 983 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 984 } 985 } 986 } 987 PetscCall(MatSetBlockSize(J, nc)); 988 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 989 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 990 MatPreallocateEnd(dnz, onz); 991 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 992 993 /* 994 For each node in the grid: we get the neighbors in the local (on processor ordering 995 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 996 PETSc ordering. 997 */ 998 if (!da->prealloc_only) { 999 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1000 for (i = xs; i < xs + nx; i++) { 1001 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1002 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1003 for (j = ys; j < ys + ny; j++) { 1004 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1005 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1006 for (k = zs; k < zs + nz; k++) { 1007 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1008 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1009 1010 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1011 1012 cnt = 0; 1013 for (l = 0; l < nc; l++) { 1014 for (ii = istart; ii < iend + 1; ii++) { 1015 for (jj = jstart; jj < jend + 1; jj++) { 1016 for (kk = kstart; kk < kend + 1; kk++) { 1017 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1018 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1019 } 1020 } 1021 } 1022 } 1023 rows[l] = l + nc * (slot); 1024 } 1025 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1026 } 1027 } 1028 } 1029 PetscCall(PetscFree(values)); 1030 /* do not copy values to GPU since they are all zero and not yet needed there */ 1031 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1032 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1033 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1034 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1035 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1036 } 1037 PetscCall(PetscFree2(rows, cols)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } 1040 1041 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1042 { 1043 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1044 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1045 MPI_Comm comm; 1046 DMBoundaryType bx, by; 1047 ISLocalToGlobalMapping ltog, mltog; 1048 DMDAStencilType st; 1049 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1050 1051 PetscFunctionBegin; 1052 /* 1053 nc - number of components per grid point 1054 col - number of colors needed in one direction for single component problem 1055 */ 1056 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1057 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1058 col = 2 * s + 1; 1059 /* 1060 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1061 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1062 */ 1063 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1064 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1065 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1066 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1067 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1068 1069 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1070 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1071 1072 PetscCall(MatSetBlockSize(J, nc)); 1073 /* determine the matrix preallocation information */ 1074 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1075 for (i = xs; i < xs + nx; i++) { 1076 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1077 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1078 1079 for (j = ys; j < ys + ny; j++) { 1080 slot = i - gxs + gnx * (j - gys); 1081 1082 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1083 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1084 1085 cnt = 0; 1086 for (k = 0; k < nc; k++) { 1087 for (l = lstart; l < lend + 1; l++) { 1088 for (p = pstart; p < pend + 1; p++) { 1089 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1090 cols[cnt++] = k + nc * (slot + gnx * l + p); 1091 } 1092 } 1093 } 1094 rows[k] = k + nc * (slot); 1095 } 1096 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1097 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1098 } 1099 } 1100 PetscCall(MatSetBlockSize(J, nc)); 1101 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1102 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1103 MatPreallocateEnd(dnz, onz); 1104 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1105 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1106 1107 /* 1108 For each node in the grid: we get the neighbors in the local (on processor ordering 1109 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1110 PETSc ordering. 1111 */ 1112 if (!da->prealloc_only) { 1113 for (i = xs; i < xs + nx; i++) { 1114 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1115 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1116 1117 for (j = ys; j < ys + ny; j++) { 1118 slot = i - gxs + gnx * (j - gys); 1119 1120 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1121 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1122 1123 cnt = 0; 1124 for (l = lstart; l < lend + 1; l++) { 1125 for (p = pstart; p < pend + 1; p++) { 1126 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1127 cols[cnt++] = nc * (slot + gnx * l + p); 1128 for (k = 1; k < nc; k++) { 1129 cols[cnt] = 1 + cols[cnt - 1]; 1130 cnt++; 1131 } 1132 } 1133 } 1134 } 1135 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1136 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1137 } 1138 } 1139 /* do not copy values to GPU since they are all zero and not yet needed there */ 1140 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1141 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1142 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1143 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1144 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1145 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1146 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1147 } 1148 PetscCall(PetscFree2(rows, cols)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1153 { 1154 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1155 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1156 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1157 DM_DA *dd = (DM_DA *)da->data; 1158 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1159 MPI_Comm comm; 1160 DMBoundaryType bx, by; 1161 ISLocalToGlobalMapping ltog; 1162 DMDAStencilType st; 1163 PetscBool removedups = PETSC_FALSE; 1164 1165 PetscFunctionBegin; 1166 /* 1167 nc - number of components per grid point 1168 col - number of colors needed in one direction for single component problem 1169 */ 1170 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1171 col = 2 * s + 1; 1172 /* 1173 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1174 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1175 */ 1176 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1177 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1178 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1179 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1180 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1181 1182 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1183 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1184 1185 PetscCall(MatSetBlockSize(J, nc)); 1186 /* determine the matrix preallocation information */ 1187 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1188 for (i = xs; i < xs + nx; i++) { 1189 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1190 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1191 1192 for (j = ys; j < ys + ny; j++) { 1193 slot = i - gxs + gnx * (j - gys); 1194 1195 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1196 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1197 1198 for (k = 0; k < nc; k++) { 1199 cnt = 0; 1200 for (l = lstart; l < lend + 1; l++) { 1201 for (p = pstart; p < pend + 1; p++) { 1202 if (l || p) { 1203 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1204 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1205 } 1206 } else { 1207 if (dfill) { 1208 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1209 } else { 1210 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1211 } 1212 } 1213 } 1214 } 1215 row = k + nc * (slot); 1216 maxcnt = PetscMax(maxcnt, cnt); 1217 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1218 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1219 } 1220 } 1221 } 1222 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1223 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1224 MatPreallocateEnd(dnz, onz); 1225 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1226 1227 /* 1228 For each node in the grid: we get the neighbors in the local (on processor ordering 1229 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1230 PETSc ordering. 1231 */ 1232 if (!da->prealloc_only) { 1233 for (i = xs; i < xs + nx; i++) { 1234 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1235 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1236 1237 for (j = ys; j < ys + ny; j++) { 1238 slot = i - gxs + gnx * (j - gys); 1239 1240 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1241 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1242 1243 for (k = 0; k < nc; k++) { 1244 cnt = 0; 1245 for (l = lstart; l < lend + 1; l++) { 1246 for (p = pstart; p < pend + 1; p++) { 1247 if (l || p) { 1248 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1249 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1250 } 1251 } else { 1252 if (dfill) { 1253 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1254 } else { 1255 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1256 } 1257 } 1258 } 1259 } 1260 row = k + nc * (slot); 1261 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1262 } 1263 } 1264 } 1265 /* do not copy values to GPU since they are all zero and not yet needed there */ 1266 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1267 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1268 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1269 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1270 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1271 } 1272 PetscCall(PetscFree(cols)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1277 { 1278 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1279 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1280 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1281 MPI_Comm comm; 1282 DMBoundaryType bx, by, bz; 1283 ISLocalToGlobalMapping ltog, mltog; 1284 DMDAStencilType st; 1285 PetscBool removedups = PETSC_FALSE; 1286 1287 PetscFunctionBegin; 1288 /* 1289 nc - number of components per grid point 1290 col - number of colors needed in one direction for single component problem 1291 */ 1292 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1293 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1294 col = 2 * s + 1; 1295 1296 /* 1297 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1298 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1299 */ 1300 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1301 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1302 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1303 1304 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1305 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1306 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1307 1308 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1309 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1310 1311 PetscCall(MatSetBlockSize(J, nc)); 1312 /* determine the matrix preallocation information */ 1313 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1314 for (i = xs; i < xs + nx; i++) { 1315 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1316 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1317 for (j = ys; j < ys + ny; j++) { 1318 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1319 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1320 for (k = zs; k < zs + nz; k++) { 1321 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1322 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1323 1324 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1325 1326 cnt = 0; 1327 for (l = 0; l < nc; l++) { 1328 for (ii = istart; ii < iend + 1; ii++) { 1329 for (jj = jstart; jj < jend + 1; jj++) { 1330 for (kk = kstart; kk < kend + 1; kk++) { 1331 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1332 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1333 } 1334 } 1335 } 1336 } 1337 rows[l] = l + nc * (slot); 1338 } 1339 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1340 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1341 } 1342 } 1343 } 1344 PetscCall(MatSetBlockSize(J, nc)); 1345 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1346 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1347 MatPreallocateEnd(dnz, onz); 1348 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1349 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1350 1351 /* 1352 For each node in the grid: we get the neighbors in the local (on processor ordering 1353 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1354 PETSc ordering. 1355 */ 1356 if (!da->prealloc_only) { 1357 for (i = xs; i < xs + nx; i++) { 1358 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1359 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1360 for (j = ys; j < ys + ny; j++) { 1361 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1362 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1363 for (k = zs; k < zs + nz; k++) { 1364 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1365 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1366 1367 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1368 1369 cnt = 0; 1370 for (kk = kstart; kk < kend + 1; kk++) { 1371 for (jj = jstart; jj < jend + 1; jj++) { 1372 for (ii = istart; ii < iend + 1; ii++) { 1373 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1374 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1375 for (l = 1; l < nc; l++) { 1376 cols[cnt] = 1 + cols[cnt - 1]; 1377 cnt++; 1378 } 1379 } 1380 } 1381 } 1382 } 1383 rows[0] = nc * (slot); 1384 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1385 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1386 } 1387 } 1388 } 1389 /* do not copy values to GPU since they are all zero and not yet needed there */ 1390 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1391 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1392 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1393 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1394 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1395 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1396 } 1397 PetscCall(PetscFree2(rows, cols)); 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1402 { 1403 DM_DA *dd = (DM_DA *)da->data; 1404 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1405 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1406 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1407 DMBoundaryType bx; 1408 ISLocalToGlobalMapping ltog; 1409 PetscMPIInt rank, size; 1410 1411 PetscFunctionBegin; 1412 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1413 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1414 1415 /* 1416 nc - number of components per grid point 1417 */ 1418 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1419 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1420 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1421 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1422 1423 PetscCall(MatSetBlockSize(J, nc)); 1424 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1425 1426 /* 1427 note should be smaller for first and last process with no periodic 1428 does not handle dfill 1429 */ 1430 cnt = 0; 1431 /* coupling with process to the left */ 1432 for (i = 0; i < s; i++) { 1433 for (j = 0; j < nc; j++) { 1434 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1435 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1436 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1437 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1438 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1439 } 1440 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1441 cnt++; 1442 } 1443 } 1444 for (i = s; i < nx - s; i++) { 1445 for (j = 0; j < nc; j++) { 1446 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1447 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1448 cnt++; 1449 } 1450 } 1451 /* coupling with process to the right */ 1452 for (i = nx - s; i < nx; i++) { 1453 for (j = 0; j < nc; j++) { 1454 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1455 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1456 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1457 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1458 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1459 } 1460 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1461 cnt++; 1462 } 1463 } 1464 1465 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1466 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1467 PetscCall(PetscFree2(cols, ocols)); 1468 1469 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1470 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1471 1472 /* 1473 For each node in the grid: we get the neighbors in the local (on processor ordering 1474 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1475 PETSc ordering. 1476 */ 1477 if (!da->prealloc_only) { 1478 PetscCall(PetscMalloc1(maxcnt, &cols)); 1479 row = xs * nc; 1480 /* coupling with process to the left */ 1481 for (i = xs; i < xs + s; i++) { 1482 for (j = 0; j < nc; j++) { 1483 cnt = 0; 1484 if (rank) { 1485 for (l = 0; l < s; l++) { 1486 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1487 } 1488 } 1489 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1490 for (l = 0; l < s; l++) { 1491 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1492 } 1493 } 1494 if (dfill) { 1495 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1496 } else { 1497 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1498 } 1499 for (l = 0; l < s; l++) { 1500 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1501 } 1502 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1503 row++; 1504 } 1505 } 1506 for (i = xs + s; i < xs + nx - s; i++) { 1507 for (j = 0; j < nc; j++) { 1508 cnt = 0; 1509 for (l = 0; l < s; l++) { 1510 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1511 } 1512 if (dfill) { 1513 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1514 } else { 1515 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1516 } 1517 for (l = 0; l < s; l++) { 1518 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1519 } 1520 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1521 row++; 1522 } 1523 } 1524 /* coupling with process to the right */ 1525 for (i = xs + nx - s; i < xs + nx; i++) { 1526 for (j = 0; j < nc; j++) { 1527 cnt = 0; 1528 for (l = 0; l < s; l++) { 1529 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1530 } 1531 if (dfill) { 1532 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1533 } else { 1534 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1535 } 1536 if (rank < size - 1) { 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1539 } 1540 } 1541 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1542 for (l = 0; l < s; l++) { 1543 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1544 } 1545 } 1546 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1547 row++; 1548 } 1549 } 1550 PetscCall(PetscFree(cols)); 1551 /* do not copy values to GPU since they are all zero and not yet needed there */ 1552 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1553 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1554 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1555 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1556 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1557 } 1558 PetscFunctionReturn(PETSC_SUCCESS); 1559 } 1560 1561 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1562 { 1563 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1564 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1565 PetscInt istart, iend; 1566 DMBoundaryType bx; 1567 ISLocalToGlobalMapping ltog, mltog; 1568 1569 PetscFunctionBegin; 1570 /* 1571 nc - number of components per grid point 1572 col - number of colors needed in one direction for single component problem 1573 */ 1574 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1575 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1576 col = 2 * s + 1; 1577 1578 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1579 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1580 1581 PetscCall(MatSetBlockSize(J, nc)); 1582 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1583 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1584 1585 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1586 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1587 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1588 1589 /* 1590 For each node in the grid: we get the neighbors in the local (on processor ordering 1591 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1592 PETSc ordering. 1593 */ 1594 if (!da->prealloc_only) { 1595 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1596 for (i = xs; i < xs + nx; i++) { 1597 istart = PetscMax(-s, gxs - i); 1598 iend = PetscMin(s, gxs + gnx - i - 1); 1599 slot = i - gxs; 1600 1601 cnt = 0; 1602 for (i1 = istart; i1 < iend + 1; i1++) { 1603 cols[cnt++] = nc * (slot + i1); 1604 for (l = 1; l < nc; l++) { 1605 cols[cnt] = 1 + cols[cnt - 1]; 1606 cnt++; 1607 } 1608 } 1609 rows[0] = nc * (slot); 1610 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1611 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1612 } 1613 /* do not copy values to GPU since they are all zero and not yet needed there */ 1614 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1615 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1616 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1617 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1618 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1619 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1620 PetscCall(PetscFree2(rows, cols)); 1621 } 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1626 { 1627 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1628 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1629 PetscInt istart, iend; 1630 DMBoundaryType bx; 1631 ISLocalToGlobalMapping ltog, mltog; 1632 1633 PetscFunctionBegin; 1634 /* 1635 nc - number of components per grid point 1636 col - number of colors needed in one direction for single component problem 1637 */ 1638 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1639 col = 2 * s + 1; 1640 1641 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1642 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1643 1644 PetscCall(MatSetBlockSize(J, nc)); 1645 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1646 1647 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1648 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1649 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1650 1651 /* 1652 For each node in the grid: we get the neighbors in the local (on processor ordering 1653 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1654 PETSc ordering. 1655 */ 1656 if (!da->prealloc_only) { 1657 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1658 for (i = xs; i < xs + nx; i++) { 1659 istart = PetscMax(-s, gxs - i); 1660 iend = PetscMin(s, gxs + gnx - i - 1); 1661 slot = i - gxs; 1662 1663 cnt = 0; 1664 for (i1 = istart; i1 < iend + 1; i1++) { 1665 cols[cnt++] = nc * (slot + i1); 1666 for (l = 1; l < nc; l++) { 1667 cols[cnt] = 1 + cols[cnt - 1]; 1668 cnt++; 1669 } 1670 } 1671 rows[0] = nc * (slot); 1672 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1673 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1674 } 1675 /* do not copy values to GPU since they are all zero and not yet needed there */ 1676 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1677 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1678 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1679 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1680 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1681 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1682 PetscCall(PetscFree2(rows, cols)); 1683 } 1684 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1685 PetscFunctionReturn(PETSC_SUCCESS); 1686 } 1687 1688 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1689 { 1690 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1691 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1692 PetscInt istart, iend, jstart, jend, ii, jj; 1693 MPI_Comm comm; 1694 PetscScalar *values; 1695 DMBoundaryType bx, by; 1696 DMDAStencilType st; 1697 ISLocalToGlobalMapping ltog; 1698 1699 PetscFunctionBegin; 1700 /* 1701 nc - number of components per grid point 1702 col - number of colors needed in one direction for single component problem 1703 */ 1704 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1705 col = 2 * s + 1; 1706 1707 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1708 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1709 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1710 1711 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1712 1713 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1714 1715 /* determine the matrix preallocation information */ 1716 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1717 for (i = xs; i < xs + nx; i++) { 1718 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1719 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1720 for (j = ys; j < ys + ny; j++) { 1721 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1722 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1723 slot = i - gxs + gnx * (j - gys); 1724 1725 /* Find block columns in block row */ 1726 cnt = 0; 1727 for (ii = istart; ii < iend + 1; ii++) { 1728 for (jj = jstart; jj < jend + 1; jj++) { 1729 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1730 cols[cnt++] = slot + ii + gnx * jj; 1731 } 1732 } 1733 } 1734 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1735 } 1736 } 1737 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1738 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1739 MatPreallocateEnd(dnz, onz); 1740 1741 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1742 1743 /* 1744 For each node in the grid: we get the neighbors in the local (on processor ordering 1745 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1746 PETSc ordering. 1747 */ 1748 if (!da->prealloc_only) { 1749 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1750 for (i = xs; i < xs + nx; i++) { 1751 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1752 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1753 for (j = ys; j < ys + ny; j++) { 1754 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1755 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1756 slot = i - gxs + gnx * (j - gys); 1757 cnt = 0; 1758 for (ii = istart; ii < iend + 1; ii++) { 1759 for (jj = jstart; jj < jend + 1; jj++) { 1760 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1761 cols[cnt++] = slot + ii + gnx * jj; 1762 } 1763 } 1764 } 1765 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1766 } 1767 } 1768 PetscCall(PetscFree(values)); 1769 /* do not copy values to GPU since they are all zero and not yet needed there */ 1770 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1771 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1772 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1773 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1774 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1775 } 1776 PetscCall(PetscFree(cols)); 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1781 { 1782 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1783 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1784 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1785 MPI_Comm comm; 1786 PetscScalar *values; 1787 DMBoundaryType bx, by, bz; 1788 DMDAStencilType st; 1789 ISLocalToGlobalMapping ltog; 1790 1791 PetscFunctionBegin; 1792 /* 1793 nc - number of components per grid point 1794 col - number of colors needed in one direction for single component problem 1795 */ 1796 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1797 col = 2 * s + 1; 1798 1799 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1800 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1801 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1802 1803 PetscCall(PetscMalloc1(col * col * col, &cols)); 1804 1805 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1806 1807 /* determine the matrix preallocation information */ 1808 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1809 for (i = xs; i < xs + nx; i++) { 1810 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1811 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1812 for (j = ys; j < ys + ny; j++) { 1813 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1814 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1815 for (k = zs; k < zs + nz; k++) { 1816 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1817 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1818 1819 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1820 1821 /* Find block columns in block row */ 1822 cnt = 0; 1823 for (ii = istart; ii < iend + 1; ii++) { 1824 for (jj = jstart; jj < jend + 1; jj++) { 1825 for (kk = kstart; kk < kend + 1; kk++) { 1826 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1827 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1828 } 1829 } 1830 } 1831 } 1832 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1833 } 1834 } 1835 } 1836 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1837 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1838 MatPreallocateEnd(dnz, onz); 1839 1840 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1841 1842 /* 1843 For each node in the grid: we get the neighbors in the local (on processor ordering 1844 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1845 PETSc ordering. 1846 */ 1847 if (!da->prealloc_only) { 1848 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1849 for (i = xs; i < xs + nx; i++) { 1850 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1851 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1852 for (j = ys; j < ys + ny; j++) { 1853 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1854 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1855 for (k = zs; k < zs + nz; k++) { 1856 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1857 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1858 1859 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1860 1861 cnt = 0; 1862 for (ii = istart; ii < iend + 1; ii++) { 1863 for (jj = jstart; jj < jend + 1; jj++) { 1864 for (kk = kstart; kk < kend + 1; kk++) { 1865 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1866 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1867 } 1868 } 1869 } 1870 } 1871 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1872 } 1873 } 1874 } 1875 PetscCall(PetscFree(values)); 1876 /* do not copy values to GPU since they are all zero and not yet needed there */ 1877 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1878 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1879 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1880 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1881 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1882 } 1883 PetscCall(PetscFree(cols)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 /* 1888 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1889 identify in the local ordering with periodic domain. 1890 */ 1891 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1892 { 1893 PetscInt i, n; 1894 1895 PetscFunctionBegin; 1896 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1897 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1898 for (i = 0, n = 0; i < *cnt; i++) { 1899 if (col[i] >= *row) col[n++] = col[i]; 1900 } 1901 *cnt = n; 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1906 { 1907 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1908 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1909 PetscInt istart, iend, jstart, jend, ii, jj; 1910 MPI_Comm comm; 1911 PetscScalar *values; 1912 DMBoundaryType bx, by; 1913 DMDAStencilType st; 1914 ISLocalToGlobalMapping ltog; 1915 1916 PetscFunctionBegin; 1917 /* 1918 nc - number of components per grid point 1919 col - number of colors needed in one direction for single component problem 1920 */ 1921 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1922 col = 2 * s + 1; 1923 1924 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1925 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1926 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1927 1928 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1929 1930 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1931 1932 /* determine the matrix preallocation information */ 1933 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1934 for (i = xs; i < xs + nx; i++) { 1935 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1936 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1937 for (j = ys; j < ys + ny; j++) { 1938 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1939 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1940 slot = i - gxs + gnx * (j - gys); 1941 1942 /* Find block columns in block row */ 1943 cnt = 0; 1944 for (ii = istart; ii < iend + 1; ii++) { 1945 for (jj = jstart; jj < jend + 1; jj++) { 1946 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1947 } 1948 } 1949 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1950 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1951 } 1952 } 1953 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1954 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1955 MatPreallocateEnd(dnz, onz); 1956 1957 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1958 1959 /* 1960 For each node in the grid: we get the neighbors in the local (on processor ordering 1961 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1962 PETSc ordering. 1963 */ 1964 if (!da->prealloc_only) { 1965 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1966 for (i = xs; i < xs + nx; i++) { 1967 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1968 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1969 for (j = ys; j < ys + ny; j++) { 1970 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1971 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1972 slot = i - gxs + gnx * (j - gys); 1973 1974 /* Find block columns in block row */ 1975 cnt = 0; 1976 for (ii = istart; ii < iend + 1; ii++) { 1977 for (jj = jstart; jj < jend + 1; jj++) { 1978 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1979 } 1980 } 1981 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1982 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1983 } 1984 } 1985 PetscCall(PetscFree(values)); 1986 /* do not copy values to GPU since they are all zero and not yet needed there */ 1987 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1988 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1989 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1991 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1992 } 1993 PetscCall(PetscFree(cols)); 1994 PetscFunctionReturn(PETSC_SUCCESS); 1995 } 1996 1997 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 1998 { 1999 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2000 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2001 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2002 MPI_Comm comm; 2003 PetscScalar *values; 2004 DMBoundaryType bx, by, bz; 2005 DMDAStencilType st; 2006 ISLocalToGlobalMapping ltog; 2007 2008 PetscFunctionBegin; 2009 /* 2010 nc - number of components per grid point 2011 col - number of colors needed in one direction for single component problem 2012 */ 2013 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2014 col = 2 * s + 1; 2015 2016 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2017 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2018 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2019 2020 /* create the matrix */ 2021 PetscCall(PetscMalloc1(col * col * col, &cols)); 2022 2023 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2024 2025 /* determine the matrix preallocation information */ 2026 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2027 for (i = xs; i < xs + nx; i++) { 2028 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2029 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2030 for (j = ys; j < ys + ny; j++) { 2031 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2032 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2033 for (k = zs; k < zs + nz; k++) { 2034 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2035 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2036 2037 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2038 2039 /* Find block columns in block row */ 2040 cnt = 0; 2041 for (ii = istart; ii < iend + 1; ii++) { 2042 for (jj = jstart; jj < jend + 1; jj++) { 2043 for (kk = kstart; kk < kend + 1; kk++) { 2044 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2045 } 2046 } 2047 } 2048 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2049 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2050 } 2051 } 2052 } 2053 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2054 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2055 MatPreallocateEnd(dnz, onz); 2056 2057 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2058 2059 /* 2060 For each node in the grid: we get the neighbors in the local (on processor ordering 2061 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2062 PETSc ordering. 2063 */ 2064 if (!da->prealloc_only) { 2065 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2066 for (i = xs; i < xs + nx; i++) { 2067 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2068 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2069 for (j = ys; j < ys + ny; j++) { 2070 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2071 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2072 for (k = zs; k < zs + nz; k++) { 2073 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2074 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2075 2076 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2077 2078 cnt = 0; 2079 for (ii = istart; ii < iend + 1; ii++) { 2080 for (jj = jstart; jj < jend + 1; jj++) { 2081 for (kk = kstart; kk < kend + 1; kk++) { 2082 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2083 } 2084 } 2085 } 2086 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2087 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2088 } 2089 } 2090 } 2091 PetscCall(PetscFree(values)); 2092 /* do not copy values to GPU since they are all zero and not yet needed there */ 2093 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2094 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2095 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2096 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2097 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2098 } 2099 PetscCall(PetscFree(cols)); 2100 PetscFunctionReturn(PETSC_SUCCESS); 2101 } 2102 2103 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2104 { 2105 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2106 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2107 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2108 DM_DA *dd = (DM_DA *)da->data; 2109 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2110 MPI_Comm comm; 2111 PetscScalar *values; 2112 DMBoundaryType bx, by, bz; 2113 ISLocalToGlobalMapping ltog; 2114 DMDAStencilType st; 2115 PetscBool removedups = PETSC_FALSE; 2116 2117 PetscFunctionBegin; 2118 /* 2119 nc - number of components per grid point 2120 col - number of colors needed in one direction for single component problem 2121 */ 2122 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2123 col = 2 * s + 1; 2124 2125 /* 2126 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2127 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2128 */ 2129 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2130 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2131 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2132 2133 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2134 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2135 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2136 2137 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2138 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2139 2140 /* determine the matrix preallocation information */ 2141 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2142 2143 PetscCall(MatSetBlockSize(J, nc)); 2144 for (i = xs; i < xs + nx; i++) { 2145 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2146 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2147 for (j = ys; j < ys + ny; j++) { 2148 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2149 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2150 for (k = zs; k < zs + nz; k++) { 2151 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2152 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2153 2154 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2155 2156 for (l = 0; l < nc; l++) { 2157 cnt = 0; 2158 for (ii = istart; ii < iend + 1; ii++) { 2159 for (jj = jstart; jj < jend + 1; jj++) { 2160 for (kk = kstart; kk < kend + 1; kk++) { 2161 if (ii || jj || kk) { 2162 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2163 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2164 } 2165 } else { 2166 if (dfill) { 2167 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2168 } else { 2169 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2170 } 2171 } 2172 } 2173 } 2174 } 2175 row = l + nc * (slot); 2176 maxcnt = PetscMax(maxcnt, cnt); 2177 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2178 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2179 } 2180 } 2181 } 2182 } 2183 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2184 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2185 MatPreallocateEnd(dnz, onz); 2186 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2187 2188 /* 2189 For each node in the grid: we get the neighbors in the local (on processor ordering 2190 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2191 PETSc ordering. 2192 */ 2193 if (!da->prealloc_only) { 2194 PetscCall(PetscCalloc1(maxcnt, &values)); 2195 for (i = xs; i < xs + nx; i++) { 2196 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2197 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2198 for (j = ys; j < ys + ny; j++) { 2199 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2200 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2201 for (k = zs; k < zs + nz; k++) { 2202 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2203 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2204 2205 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2206 2207 for (l = 0; l < nc; l++) { 2208 cnt = 0; 2209 for (ii = istart; ii < iend + 1; ii++) { 2210 for (jj = jstart; jj < jend + 1; jj++) { 2211 for (kk = kstart; kk < kend + 1; kk++) { 2212 if (ii || jj || kk) { 2213 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2214 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2215 } 2216 } else { 2217 if (dfill) { 2218 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2219 } else { 2220 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2221 } 2222 } 2223 } 2224 } 2225 } 2226 row = l + nc * (slot); 2227 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2228 } 2229 } 2230 } 2231 } 2232 PetscCall(PetscFree(values)); 2233 /* do not copy values to GPU since they are all zero and not yet needed there */ 2234 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2235 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2236 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2237 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2238 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2239 } 2240 PetscCall(PetscFree(cols)); 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243