1 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 2 #include <petscmat.h> 3 #include <petscbt.h> 4 5 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 6 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 9 10 /* 11 For ghost i that may be negative or greater than the upper bound this 12 maps it into the 0:m-1 range using periodicity 13 */ 14 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 15 16 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 17 { 18 PetscInt i, j, nz, *fill; 19 20 PetscFunctionBegin; 21 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 22 23 /* count number nonzeros */ 24 nz = 0; 25 for (i = 0; i < w; i++) { 26 for (j = 0; j < w; j++) { 27 if (dfill[w * i + j]) nz++; 28 } 29 } 30 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 31 /* construct modified CSR storage of nonzero structure */ 32 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 33 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 34 nz = w + 1; 35 for (i = 0; i < w; i++) { 36 fill[i] = nz; 37 for (j = 0; j < w; j++) { 38 if (dfill[w * i + j]) { 39 fill[nz] = j; 40 nz++; 41 } 42 } 43 } 44 fill[w] = nz; 45 46 *rfill = fill; 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 51 { 52 PetscInt nz; 53 54 PetscFunctionBegin; 55 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 56 57 /* Determine number of non-zeros */ 58 nz = (dfillsparse[w] - w - 1); 59 60 /* Allocate space for our copy of the given sparse matrix representation. */ 61 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 62 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 67 { 68 PetscInt i, k, cnt = 1; 69 70 PetscFunctionBegin; 71 72 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 73 columns to the left with any nonzeros in them plus 1 */ 74 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 75 for (i = 0; i < dd->w; i++) { 76 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 77 } 78 for (i = 0; i < dd->w; i++) { 79 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 80 } 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 /*@ 85 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 86 of the matrix returned by `DMCreateMatrix()`. 87 88 Logically Collective 89 90 Input Parameters: 91 + da - the distributed array 92 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 93 - ofill - the fill pattern in the off-diagonal blocks 94 95 Level: developer 96 97 Notes: 98 This only makes sense when you are doing multicomponent problems but using the 99 `MATMPIAIJ` matrix format 100 101 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 102 representing coupling and 0 entries for missing coupling. For example 103 .vb 104 dfill[9] = {1, 0, 0, 105 1, 1, 0, 106 0, 1, 1} 107 .ve 108 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 109 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 110 diagonal block). 111 112 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 113 can be represented in the dfill, ofill format 114 115 Contributed by\: 116 Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective 140 141 Input Parameters: 142 + da - the distributed array 143 . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block) 144 - ofillsparse - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for `dfill` and `ofill` is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the `dfill`, `ofill` format 168 169 Contributed by\: 170 Philip C. Roth 171 172 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 173 @*/ 174 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 175 { 176 DM_DA *dd = (DM_DA *)da->data; 177 178 PetscFunctionBegin; 179 /* save the given dfill and ofill information */ 180 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 181 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 182 183 /* count nonzeros in ofill columns */ 184 PetscCall(DMDASetBlockFills_Private2(dd)); 185 PetscFunctionReturn(PETSC_SUCCESS); 186 } 187 188 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 189 { 190 PetscInt dim, m, n, p, nc; 191 DMBoundaryType bx, by, bz; 192 MPI_Comm comm; 193 PetscMPIInt size; 194 PetscBool isBAIJ; 195 DM_DA *dd = (DM_DA *)da->data; 196 197 PetscFunctionBegin; 198 /* 199 m 200 ------------------------------------------------------ 201 | | 202 | | 203 | ---------------------- | 204 | | | | 205 n | yn | | | 206 | | | | 207 | .--------------------- | 208 | (xs,ys) xn | 209 | . | 210 | (gxs,gys) | 211 | | 212 ----------------------------------------------------- 213 */ 214 215 /* 216 nc - number of components per grid point 217 col - number of colors needed in one direction for single component problem 218 219 */ 220 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 221 222 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 223 PetscCallMPI(MPI_Comm_size(comm, &size)); 224 if (ctype == IS_COLORING_LOCAL) { 225 if (size == 1) { 226 ctype = IS_COLORING_GLOBAL; 227 } else { 228 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 229 } 230 } 231 232 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 233 matrices is for the blocks, not the individual matrix elements */ 234 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 236 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 237 if (isBAIJ) { 238 dd->w = 1; 239 dd->xs = dd->xs / nc; 240 dd->xe = dd->xe / nc; 241 dd->Xs = dd->Xs / nc; 242 dd->Xe = dd->Xe / nc; 243 } 244 245 /* 246 We do not provide a getcoloring function in the DMDA operations because 247 the basic DMDA does not know about matrices. We think of DMDA as being 248 more low-level then matrices. 249 */ 250 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 252 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 253 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 254 if (isBAIJ) { 255 dd->w = nc; 256 dd->xs = dd->xs * nc; 257 dd->xe = dd->xe * nc; 258 dd->Xs = dd->Xs * nc; 259 dd->Xe = dd->Xe * nc; 260 } 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 265 { 266 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 267 PetscInt ncolors = 0; 268 MPI_Comm comm; 269 DMBoundaryType bx, by; 270 DMDAStencilType st; 271 ISColoringValue *colors; 272 DM_DA *dd = (DM_DA *)da->data; 273 274 PetscFunctionBegin; 275 /* 276 nc - number of components per grid point 277 col - number of colors needed in one direction for single component problem 278 279 */ 280 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 281 col = 2 * s + 1; 282 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 283 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 284 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 285 286 /* special case as taught to us by Paul Hovland */ 287 if (st == DMDA_STENCIL_STAR && s == 1) { 288 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 289 } else { 290 if (ctype == IS_COLORING_GLOBAL) { 291 if (!dd->localcoloring) { 292 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 293 ii = 0; 294 for (j = ys; j < ys + ny; j++) { 295 for (i = xs; i < xs + nx; i++) { 296 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 297 } 298 } 299 ncolors = nc + nc * (col - 1 + col * (col - 1)); 300 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 301 } 302 *coloring = dd->localcoloring; 303 } else if (ctype == IS_COLORING_LOCAL) { 304 if (!dd->ghostedcoloring) { 305 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 306 ii = 0; 307 for (j = gys; j < gys + gny; j++) { 308 for (i = gxs; i < gxs + gnx; i++) { 309 for (k = 0; k < nc; k++) { 310 /* the complicated stuff is to handle periodic boundaries */ 311 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 312 } 313 } 314 } 315 ncolors = nc + nc * (col - 1 + col * (col - 1)); 316 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 317 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 318 319 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 320 } 321 *coloring = dd->ghostedcoloring; 322 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 323 } 324 PetscCall(ISColoringReference(*coloring)); 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 329 { 330 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 331 PetscInt ncolors; 332 MPI_Comm comm; 333 DMBoundaryType bx, by, bz; 334 DMDAStencilType st; 335 ISColoringValue *colors; 336 DM_DA *dd = (DM_DA *)da->data; 337 338 PetscFunctionBegin; 339 /* 340 nc - number of components per grid point 341 col - number of colors needed in one direction for single component problem 342 */ 343 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 344 col = 2 * s + 1; 345 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 346 by 2*stencil_width + 1\n"); 347 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 348 by 2*stencil_width + 1\n"); 349 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 350 by 2*stencil_width + 1\n"); 351 352 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 353 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 354 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 355 356 /* create the coloring */ 357 if (ctype == IS_COLORING_GLOBAL) { 358 if (!dd->localcoloring) { 359 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 360 ii = 0; 361 for (k = zs; k < zs + nz; k++) { 362 for (j = ys; j < ys + ny; j++) { 363 for (i = xs; i < xs + nx; i++) { 364 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 365 } 366 } 367 } 368 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 369 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 370 } 371 *coloring = dd->localcoloring; 372 } else if (ctype == IS_COLORING_LOCAL) { 373 if (!dd->ghostedcoloring) { 374 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 375 ii = 0; 376 for (k = gzs; k < gzs + gnz; k++) { 377 for (j = gys; j < gys + gny; j++) { 378 for (i = gxs; i < gxs + gnx; i++) { 379 for (l = 0; l < nc; l++) { 380 /* the complicated stuff is to handle periodic boundaries */ 381 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 382 } 383 } 384 } 385 } 386 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 387 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 388 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 389 } 390 *coloring = dd->ghostedcoloring; 391 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 392 PetscCall(ISColoringReference(*coloring)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 397 { 398 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 399 PetscInt ncolors; 400 MPI_Comm comm; 401 DMBoundaryType bx; 402 ISColoringValue *colors; 403 DM_DA *dd = (DM_DA *)da->data; 404 405 PetscFunctionBegin; 406 /* 407 nc - number of components per grid point 408 col - number of colors needed in one direction for single component problem 409 */ 410 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 411 col = 2 * s + 1; 412 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 413 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 414 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 415 416 /* create the coloring */ 417 if (ctype == IS_COLORING_GLOBAL) { 418 if (!dd->localcoloring) { 419 PetscCall(PetscMalloc1(nc * nx, &colors)); 420 if (dd->ofillcols) { 421 PetscInt tc = 0; 422 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 423 i1 = 0; 424 for (i = xs; i < xs + nx; i++) { 425 for (l = 0; l < nc; l++) { 426 if (dd->ofillcols[l] && (i % col)) { 427 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 428 } else { 429 colors[i1++] = l; 430 } 431 } 432 } 433 ncolors = nc + 2 * s * tc; 434 } else { 435 i1 = 0; 436 for (i = xs; i < xs + nx; i++) { 437 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 438 } 439 ncolors = nc + nc * (col - 1); 440 } 441 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 442 } 443 *coloring = dd->localcoloring; 444 } else if (ctype == IS_COLORING_LOCAL) { 445 if (!dd->ghostedcoloring) { 446 PetscCall(PetscMalloc1(nc * gnx, &colors)); 447 i1 = 0; 448 for (i = gxs; i < gxs + gnx; i++) { 449 for (l = 0; l < nc; l++) { 450 /* the complicated stuff is to handle periodic boundaries */ 451 colors[i1++] = l + nc * (SetInRange(i, m) % col); 452 } 453 } 454 ncolors = nc + nc * (col - 1); 455 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 456 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 457 } 458 *coloring = dd->ghostedcoloring; 459 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 460 PetscCall(ISColoringReference(*coloring)); 461 PetscFunctionReturn(PETSC_SUCCESS); 462 } 463 464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 465 { 466 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 467 PetscInt ncolors; 468 MPI_Comm comm; 469 DMBoundaryType bx, by; 470 ISColoringValue *colors; 471 DM_DA *dd = (DM_DA *)da->data; 472 473 PetscFunctionBegin; 474 /* 475 nc - number of components per grid point 476 col - number of colors needed in one direction for single component problem 477 */ 478 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 479 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 480 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 481 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 482 /* create the coloring */ 483 if (ctype == IS_COLORING_GLOBAL) { 484 if (!dd->localcoloring) { 485 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 486 ii = 0; 487 for (j = ys; j < ys + ny; j++) { 488 for (i = xs; i < xs + nx; i++) { 489 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 490 } 491 } 492 ncolors = 5 * nc; 493 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 494 } 495 *coloring = dd->localcoloring; 496 } else if (ctype == IS_COLORING_LOCAL) { 497 if (!dd->ghostedcoloring) { 498 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 499 ii = 0; 500 for (j = gys; j < gys + gny; j++) { 501 for (i = gxs; i < gxs + gnx; i++) { 502 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 503 } 504 } 505 ncolors = 5 * nc; 506 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 507 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 508 } 509 *coloring = dd->ghostedcoloring; 510 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513 514 /* =========================================================================== */ 515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 529 530 static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 531 { 532 DM da; 533 const char *prefix; 534 Mat AA, Anatural; 535 AO ao; 536 PetscInt rstart, rend, *petsc, i; 537 IS is; 538 MPI_Comm comm; 539 PetscViewerFormat format; 540 PetscBool flag; 541 542 PetscFunctionBegin; 543 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 544 PetscCall(PetscViewerGetFormat(viewer, &format)); 545 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 546 547 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 548 PetscCall(MatGetDM(A, &da)); 549 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 550 551 PetscCall(DMDAGetAO(da, &ao)); 552 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 553 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 554 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 555 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 556 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 557 558 /* call viewer on natural ordering */ 559 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag)); 560 if (flag) { 561 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA)); 562 A = AA; 563 } 564 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 565 PetscCall(ISDestroy(&is)); 566 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 567 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 568 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 569 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 570 PetscCall(MatView(Anatural, viewer)); 571 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 572 PetscCall(MatDestroy(&Anatural)); 573 if (flag) PetscCall(MatDestroy(&AA)); 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 578 { 579 DM da; 580 Mat Anatural, Aapp; 581 AO ao; 582 PetscInt rstart, rend, *app, i, m, n, M, N; 583 IS is; 584 MPI_Comm comm; 585 586 PetscFunctionBegin; 587 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 588 PetscCall(MatGetDM(A, &da)); 589 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 590 591 /* Load the matrix in natural ordering */ 592 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 593 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 594 PetscCall(MatGetSize(A, &M, &N)); 595 PetscCall(MatGetLocalSize(A, &m, &n)); 596 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 597 PetscCall(MatLoad(Anatural, viewer)); 598 599 /* Map natural ordering to application ordering and create IS */ 600 PetscCall(DMDAGetAO(da, &ao)); 601 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 602 PetscCall(PetscMalloc1(rend - rstart, &app)); 603 for (i = rstart; i < rend; i++) app[i - rstart] = i; 604 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 605 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 606 607 /* Do permutation and replace header */ 608 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 609 PetscCall(MatHeaderReplace(A, &Aapp)); 610 PetscCall(ISDestroy(&is)); 611 PetscCall(MatDestroy(&Anatural)); 612 PetscFunctionReturn(PETSC_SUCCESS); 613 } 614 615 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 616 { 617 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 618 Mat A; 619 MPI_Comm comm; 620 MatType Atype; 621 MatType mtype; 622 PetscMPIInt size; 623 DM_DA *dd = (DM_DA *)da->data; 624 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 625 626 PetscFunctionBegin; 627 PetscCall(MatInitializePackage()); 628 mtype = da->mattype; 629 630 /* 631 m 632 ------------------------------------------------------ 633 | | 634 | | 635 | ---------------------- | 636 | | | | 637 n | ny | | | 638 | | | | 639 | .--------------------- | 640 | (xs,ys) nx | 641 | . | 642 | (gxs,gys) | 643 | | 644 ----------------------------------------------------- 645 */ 646 647 /* 648 nc - number of components per grid point 649 col - number of colors needed in one direction for single component problem 650 */ 651 M = dd->M; 652 N = dd->N; 653 P = dd->P; 654 dim = da->dim; 655 dof = dd->w; 656 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 657 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 658 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 659 PetscCall(MatCreate(comm, &A)); 660 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 661 PetscCall(MatSetType(A, mtype)); 662 PetscCall(MatSetFromOptions(A)); 663 if (dof * nx * ny * nz < da->bind_below) { 664 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 665 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 666 } 667 PetscCall(MatSetDM(A, da)); 668 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 669 PetscCall(MatGetType(A, &Atype)); 670 /* 671 We do not provide a getmatrix function in the DMDA operations because 672 the basic DMDA does not know about matrices. We think of DMDA as being more 673 more low-level than matrices. This is kind of cheating but, cause sometimes 674 we think of DMDA has higher level than matrices. 675 676 We could switch based on Atype (or mtype), but we do not since the 677 specialized setting routines depend only on the particular preallocation 678 details of the matrix, not the type itself. 679 */ 680 if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO() 681 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 682 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 683 if (!aij) { 684 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 685 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 686 if (!baij) { 687 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 688 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 689 if (!sbaij) { 690 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 691 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 692 } 693 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 694 } 695 } 696 } 697 if (aij) { 698 if (dim == 1) { 699 if (dd->ofill) { 700 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 701 } else { 702 DMBoundaryType bx; 703 PetscMPIInt size; 704 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 705 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 706 if (size == 1 && bx == DM_BOUNDARY_NONE) { 707 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 708 } else { 709 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 710 } 711 } 712 } else if (dim == 2) { 713 if (dd->ofill) { 714 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 715 } else { 716 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 717 } 718 } else if (dim == 3) { 719 if (dd->ofill) { 720 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 721 } else { 722 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 723 } 724 } 725 } else if (baij) { 726 if (dim == 2) { 727 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 728 } else if (dim == 3) { 729 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 730 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 731 } else if (sbaij) { 732 if (dim == 2) { 733 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 734 } else if (dim == 3) { 735 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 736 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 737 } else if (sell) { 738 if (dim == 2) { 739 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 740 } else if (dim == 3) { 741 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 742 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 743 } else if (is) { 744 PetscCall(DMCreateMatrix_DA_IS(da, A)); 745 } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc 746 ISLocalToGlobalMapping ltog; 747 748 PetscCall(MatSetBlockSize(A, dof)); 749 PetscCall(MatSetUp(A)); 750 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 751 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 752 } 753 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 754 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 755 PetscCall(MatSetDM(A, da)); 756 PetscCallMPI(MPI_Comm_size(comm, &size)); 757 if (size > 1) { 758 /* change viewer to display matrix in natural ordering */ 759 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 760 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 761 } 762 *J = A; 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 767 768 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 769 { 770 DM_DA *da = (DM_DA *)dm->data; 771 Mat lJ, P; 772 ISLocalToGlobalMapping ltog; 773 IS is; 774 PetscBT bt; 775 const PetscInt *e_loc, *idx; 776 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 777 778 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 779 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 780 PetscFunctionBegin; 781 dof = da->w; 782 PetscCall(MatSetBlockSize(J, dof)); 783 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 784 785 /* flag local elements indices in local DMDA numbering */ 786 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 787 PetscCall(PetscBTCreate(nv / dof, &bt)); 788 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 789 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 790 791 /* filter out (set to -1) the global indices not used by the local elements */ 792 PetscCall(PetscMalloc1(nv / dof, &gidx)); 793 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 794 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 795 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 796 for (i = 0; i < nv / dof; i++) 797 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 798 PetscCall(PetscBTDestroy(&bt)); 799 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 800 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 801 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 802 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 803 PetscCall(ISDestroy(&is)); 804 805 /* Preallocation */ 806 if (dm->prealloc_skip) { 807 PetscCall(MatSetUp(J)); 808 } else { 809 PetscCall(MatISGetLocalMat(J, &lJ)); 810 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 811 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 812 PetscCall(MatSetType(P, MATPREALLOCATOR)); 813 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 814 PetscCall(MatGetSize(lJ, &N, NULL)); 815 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 816 PetscCall(MatSetSizes(P, n, n, N, N)); 817 PetscCall(MatSetBlockSize(P, dof)); 818 PetscCall(MatSetUp(P)); 819 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 820 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 821 PetscCall(MatISRestoreLocalMat(J, &lJ)); 822 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 823 PetscCall(MatDestroy(&P)); 824 825 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 826 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 827 } 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 832 { 833 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 834 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 835 MPI_Comm comm; 836 PetscScalar *values; 837 DMBoundaryType bx, by; 838 ISLocalToGlobalMapping ltog; 839 DMDAStencilType st; 840 841 PetscFunctionBegin; 842 /* 843 nc - number of components per grid point 844 col - number of colors needed in one direction for single component problem 845 */ 846 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 847 col = 2 * s + 1; 848 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 849 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 850 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 851 852 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 853 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 854 855 PetscCall(MatSetBlockSize(J, nc)); 856 /* determine the matrix preallocation information */ 857 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 858 for (i = xs; i < xs + nx; i++) { 859 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 860 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 861 862 for (j = ys; j < ys + ny; j++) { 863 slot = i - gxs + gnx * (j - gys); 864 865 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 866 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 867 868 cnt = 0; 869 for (k = 0; k < nc; k++) { 870 for (l = lstart; l < lend + 1; l++) { 871 for (p = pstart; p < pend + 1; p++) { 872 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 873 cols[cnt++] = k + nc * (slot + gnx * l + p); 874 } 875 } 876 } 877 rows[k] = k + nc * (slot); 878 } 879 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 880 } 881 } 882 PetscCall(MatSetBlockSize(J, nc)); 883 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 884 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 885 MatPreallocateEnd(dnz, onz); 886 887 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 888 889 /* 890 For each node in the grid: we get the neighbors in the local (on processor ordering 891 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 892 PETSc ordering. 893 */ 894 if (!da->prealloc_only) { 895 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 896 for (i = xs; i < xs + nx; i++) { 897 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 898 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 899 900 for (j = ys; j < ys + ny; j++) { 901 slot = i - gxs + gnx * (j - gys); 902 903 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 904 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 905 906 cnt = 0; 907 for (k = 0; k < nc; k++) { 908 for (l = lstart; l < lend + 1; l++) { 909 for (p = pstart; p < pend + 1; p++) { 910 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 911 cols[cnt++] = k + nc * (slot + gnx * l + p); 912 } 913 } 914 } 915 rows[k] = k + nc * (slot); 916 } 917 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 918 } 919 } 920 PetscCall(PetscFree(values)); 921 /* do not copy values to GPU since they are all zero and not yet needed there */ 922 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 923 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 925 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 926 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 927 } 928 PetscCall(PetscFree2(rows, cols)); 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } 931 932 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 933 { 934 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 935 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 936 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 937 MPI_Comm comm; 938 PetscScalar *values; 939 DMBoundaryType bx, by, bz; 940 ISLocalToGlobalMapping ltog; 941 DMDAStencilType st; 942 943 PetscFunctionBegin; 944 /* 945 nc - number of components per grid point 946 col - number of colors needed in one direction for single component problem 947 */ 948 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 949 col = 2 * s + 1; 950 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 951 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 952 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 953 954 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 955 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 956 957 PetscCall(MatSetBlockSize(J, nc)); 958 /* determine the matrix preallocation information */ 959 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 960 for (i = xs; i < xs + nx; i++) { 961 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 962 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 963 for (j = ys; j < ys + ny; j++) { 964 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 965 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 966 for (k = zs; k < zs + nz; k++) { 967 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 968 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 969 970 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 971 972 cnt = 0; 973 for (l = 0; l < nc; l++) { 974 for (ii = istart; ii < iend + 1; ii++) { 975 for (jj = jstart; jj < jend + 1; jj++) { 976 for (kk = kstart; kk < kend + 1; kk++) { 977 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 978 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 979 } 980 } 981 } 982 } 983 rows[l] = l + nc * (slot); 984 } 985 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 986 } 987 } 988 } 989 PetscCall(MatSetBlockSize(J, nc)); 990 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 991 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 992 MatPreallocateEnd(dnz, onz); 993 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 994 995 /* 996 For each node in the grid: we get the neighbors in the local (on processor ordering 997 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 998 PETSc ordering. 999 */ 1000 if (!da->prealloc_only) { 1001 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1002 for (i = xs; i < xs + nx; i++) { 1003 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1004 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1005 for (j = ys; j < ys + ny; j++) { 1006 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1007 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1008 for (k = zs; k < zs + nz; k++) { 1009 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1010 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1011 1012 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1013 1014 cnt = 0; 1015 for (l = 0; l < nc; l++) { 1016 for (ii = istart; ii < iend + 1; ii++) { 1017 for (jj = jstart; jj < jend + 1; jj++) { 1018 for (kk = kstart; kk < kend + 1; kk++) { 1019 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1020 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1021 } 1022 } 1023 } 1024 } 1025 rows[l] = l + nc * (slot); 1026 } 1027 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1028 } 1029 } 1030 } 1031 PetscCall(PetscFree(values)); 1032 /* do not copy values to GPU since they are all zero and not yet needed there */ 1033 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1034 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1035 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1036 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1037 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1038 } 1039 PetscCall(PetscFree2(rows, cols)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1044 { 1045 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1046 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1047 MPI_Comm comm; 1048 DMBoundaryType bx, by; 1049 ISLocalToGlobalMapping ltog, mltog; 1050 DMDAStencilType st; 1051 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1052 1053 PetscFunctionBegin; 1054 /* 1055 nc - number of components per grid point 1056 col - number of colors needed in one direction for single component problem 1057 */ 1058 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1059 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1060 col = 2 * s + 1; 1061 /* 1062 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1063 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1064 */ 1065 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1066 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1067 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1068 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1069 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1070 1071 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1072 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1073 1074 PetscCall(MatSetBlockSize(J, nc)); 1075 /* determine the matrix preallocation information */ 1076 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1077 for (i = xs; i < xs + nx; i++) { 1078 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1079 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1080 1081 for (j = ys; j < ys + ny; j++) { 1082 slot = i - gxs + gnx * (j - gys); 1083 1084 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1085 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1086 1087 cnt = 0; 1088 for (k = 0; k < nc; k++) { 1089 for (l = lstart; l < lend + 1; l++) { 1090 for (p = pstart; p < pend + 1; p++) { 1091 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1092 cols[cnt++] = k + nc * (slot + gnx * l + p); 1093 } 1094 } 1095 } 1096 rows[k] = k + nc * (slot); 1097 } 1098 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1099 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1100 } 1101 } 1102 PetscCall(MatSetBlockSize(J, nc)); 1103 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1104 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1105 MatPreallocateEnd(dnz, onz); 1106 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1107 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1108 1109 /* 1110 For each node in the grid: we get the neighbors in the local (on processor ordering 1111 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1112 PETSc ordering. 1113 */ 1114 if (!da->prealloc_only) { 1115 for (i = xs; i < xs + nx; i++) { 1116 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1117 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1118 1119 for (j = ys; j < ys + ny; j++) { 1120 slot = i - gxs + gnx * (j - gys); 1121 1122 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1123 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1124 1125 cnt = 0; 1126 for (l = lstart; l < lend + 1; l++) { 1127 for (p = pstart; p < pend + 1; p++) { 1128 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1129 cols[cnt++] = nc * (slot + gnx * l + p); 1130 for (k = 1; k < nc; k++) { 1131 cols[cnt] = 1 + cols[cnt - 1]; 1132 cnt++; 1133 } 1134 } 1135 } 1136 } 1137 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1138 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1139 } 1140 } 1141 /* do not copy values to GPU since they are all zero and not yet needed there */ 1142 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1143 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1144 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1145 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1146 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1147 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1148 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1149 } 1150 PetscCall(PetscFree2(rows, cols)); 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1155 { 1156 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1157 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1158 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1159 DM_DA *dd = (DM_DA *)da->data; 1160 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1161 MPI_Comm comm; 1162 DMBoundaryType bx, by; 1163 ISLocalToGlobalMapping ltog; 1164 DMDAStencilType st; 1165 PetscBool removedups = PETSC_FALSE; 1166 1167 PetscFunctionBegin; 1168 /* 1169 nc - number of components per grid point 1170 col - number of colors needed in one direction for single component problem 1171 */ 1172 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1173 col = 2 * s + 1; 1174 /* 1175 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1176 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1177 */ 1178 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1179 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1180 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1181 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1182 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1183 1184 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1185 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1186 1187 PetscCall(MatSetBlockSize(J, nc)); 1188 /* determine the matrix preallocation information */ 1189 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1190 for (i = xs; i < xs + nx; i++) { 1191 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1192 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1193 1194 for (j = ys; j < ys + ny; j++) { 1195 slot = i - gxs + gnx * (j - gys); 1196 1197 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1198 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1199 1200 for (k = 0; k < nc; k++) { 1201 cnt = 0; 1202 for (l = lstart; l < lend + 1; l++) { 1203 for (p = pstart; p < pend + 1; p++) { 1204 if (l || p) { 1205 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1206 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1207 } 1208 } else { 1209 if (dfill) { 1210 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1211 } else { 1212 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1213 } 1214 } 1215 } 1216 } 1217 row = k + nc * (slot); 1218 maxcnt = PetscMax(maxcnt, cnt); 1219 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1220 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1221 } 1222 } 1223 } 1224 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1225 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1226 MatPreallocateEnd(dnz, onz); 1227 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1228 1229 /* 1230 For each node in the grid: we get the neighbors in the local (on processor ordering 1231 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1232 PETSc ordering. 1233 */ 1234 if (!da->prealloc_only) { 1235 for (i = xs; i < xs + nx; i++) { 1236 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1237 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1238 1239 for (j = ys; j < ys + ny; j++) { 1240 slot = i - gxs + gnx * (j - gys); 1241 1242 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1243 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1244 1245 for (k = 0; k < nc; k++) { 1246 cnt = 0; 1247 for (l = lstart; l < lend + 1; l++) { 1248 for (p = pstart; p < pend + 1; p++) { 1249 if (l || p) { 1250 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1251 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1252 } 1253 } else { 1254 if (dfill) { 1255 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1256 } else { 1257 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1258 } 1259 } 1260 } 1261 } 1262 row = k + nc * (slot); 1263 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1264 } 1265 } 1266 } 1267 /* do not copy values to GPU since they are all zero and not yet needed there */ 1268 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1269 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1270 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1271 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1272 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1273 } 1274 PetscCall(PetscFree(cols)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1279 { 1280 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1281 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1282 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1283 MPI_Comm comm; 1284 DMBoundaryType bx, by, bz; 1285 ISLocalToGlobalMapping ltog, mltog; 1286 DMDAStencilType st; 1287 PetscBool removedups = PETSC_FALSE; 1288 1289 PetscFunctionBegin; 1290 /* 1291 nc - number of components per grid point 1292 col - number of colors needed in one direction for single component problem 1293 */ 1294 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1295 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1296 col = 2 * s + 1; 1297 1298 /* 1299 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1300 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1301 */ 1302 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1303 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1304 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1305 1306 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1307 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1308 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1309 1310 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1311 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1312 1313 PetscCall(MatSetBlockSize(J, nc)); 1314 /* determine the matrix preallocation information */ 1315 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1316 for (i = xs; i < xs + nx; i++) { 1317 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1318 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1319 for (j = ys; j < ys + ny; j++) { 1320 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1321 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1322 for (k = zs; k < zs + nz; k++) { 1323 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1324 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1325 1326 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1327 1328 cnt = 0; 1329 for (l = 0; l < nc; l++) { 1330 for (ii = istart; ii < iend + 1; ii++) { 1331 for (jj = jstart; jj < jend + 1; jj++) { 1332 for (kk = kstart; kk < kend + 1; kk++) { 1333 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1334 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1335 } 1336 } 1337 } 1338 } 1339 rows[l] = l + nc * (slot); 1340 } 1341 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1342 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1343 } 1344 } 1345 } 1346 PetscCall(MatSetBlockSize(J, nc)); 1347 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1348 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1349 MatPreallocateEnd(dnz, onz); 1350 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1351 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1352 1353 /* 1354 For each node in the grid: we get the neighbors in the local (on processor ordering 1355 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1356 PETSc ordering. 1357 */ 1358 if (!da->prealloc_only) { 1359 for (i = xs; i < xs + nx; i++) { 1360 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1361 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1362 for (j = ys; j < ys + ny; j++) { 1363 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1364 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1365 for (k = zs; k < zs + nz; k++) { 1366 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1367 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1368 1369 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1370 1371 cnt = 0; 1372 for (kk = kstart; kk < kend + 1; kk++) { 1373 for (jj = jstart; jj < jend + 1; jj++) { 1374 for (ii = istart; ii < iend + 1; ii++) { 1375 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1376 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1377 for (l = 1; l < nc; l++) { 1378 cols[cnt] = 1 + cols[cnt - 1]; 1379 cnt++; 1380 } 1381 } 1382 } 1383 } 1384 } 1385 rows[0] = nc * (slot); 1386 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1387 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1388 } 1389 } 1390 } 1391 /* do not copy values to GPU since they are all zero and not yet needed there */ 1392 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1393 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1394 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1395 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1396 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1397 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1398 } 1399 PetscCall(PetscFree2(rows, cols)); 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1404 { 1405 DM_DA *dd = (DM_DA *)da->data; 1406 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1407 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1408 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1409 DMBoundaryType bx; 1410 ISLocalToGlobalMapping ltog; 1411 PetscMPIInt rank, size; 1412 1413 PetscFunctionBegin; 1414 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1415 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1416 1417 /* 1418 nc - number of components per grid point 1419 */ 1420 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1421 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1422 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1423 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1424 1425 PetscCall(MatSetBlockSize(J, nc)); 1426 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1427 1428 /* 1429 note should be smaller for first and last process with no periodic 1430 does not handle dfill 1431 */ 1432 cnt = 0; 1433 /* coupling with process to the left */ 1434 for (i = 0; i < s; i++) { 1435 for (j = 0; j < nc; j++) { 1436 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1437 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1438 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1439 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1440 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1441 } 1442 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1443 cnt++; 1444 } 1445 } 1446 for (i = s; i < nx - s; i++) { 1447 for (j = 0; j < nc; j++) { 1448 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1449 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1450 cnt++; 1451 } 1452 } 1453 /* coupling with process to the right */ 1454 for (i = nx - s; i < nx; i++) { 1455 for (j = 0; j < nc; j++) { 1456 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1457 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1458 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1459 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1460 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1461 } 1462 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1463 cnt++; 1464 } 1465 } 1466 1467 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1468 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1469 PetscCall(PetscFree2(cols, ocols)); 1470 1471 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1472 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1473 1474 /* 1475 For each node in the grid: we get the neighbors in the local (on processor ordering 1476 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1477 PETSc ordering. 1478 */ 1479 if (!da->prealloc_only) { 1480 PetscCall(PetscMalloc1(maxcnt, &cols)); 1481 row = xs * nc; 1482 /* coupling with process to the left */ 1483 for (i = xs; i < xs + s; i++) { 1484 for (j = 0; j < nc; j++) { 1485 cnt = 0; 1486 if (rank) { 1487 for (l = 0; l < s; l++) { 1488 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1489 } 1490 } 1491 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1492 for (l = 0; l < s; l++) { 1493 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1494 } 1495 } 1496 if (dfill) { 1497 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1498 } else { 1499 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1500 } 1501 for (l = 0; l < s; l++) { 1502 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1503 } 1504 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1505 row++; 1506 } 1507 } 1508 for (i = xs + s; i < xs + nx - s; i++) { 1509 for (j = 0; j < nc; j++) { 1510 cnt = 0; 1511 for (l = 0; l < s; l++) { 1512 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1513 } 1514 if (dfill) { 1515 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1516 } else { 1517 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1518 } 1519 for (l = 0; l < s; l++) { 1520 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1521 } 1522 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1523 row++; 1524 } 1525 } 1526 /* coupling with process to the right */ 1527 for (i = xs + nx - s; i < xs + nx; i++) { 1528 for (j = 0; j < nc; j++) { 1529 cnt = 0; 1530 for (l = 0; l < s; l++) { 1531 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1532 } 1533 if (dfill) { 1534 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1535 } else { 1536 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1537 } 1538 if (rank < size - 1) { 1539 for (l = 0; l < s; l++) { 1540 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1541 } 1542 } 1543 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1544 for (l = 0; l < s; l++) { 1545 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1546 } 1547 } 1548 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1549 row++; 1550 } 1551 } 1552 PetscCall(PetscFree(cols)); 1553 /* do not copy values to GPU since they are all zero and not yet needed there */ 1554 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1555 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1556 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1557 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1558 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1559 } 1560 PetscFunctionReturn(PETSC_SUCCESS); 1561 } 1562 1563 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1564 { 1565 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1566 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1567 PetscInt istart, iend; 1568 DMBoundaryType bx; 1569 ISLocalToGlobalMapping ltog, mltog; 1570 1571 PetscFunctionBegin; 1572 /* 1573 nc - number of components per grid point 1574 col - number of colors needed in one direction for single component problem 1575 */ 1576 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1577 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1578 col = 2 * s + 1; 1579 1580 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1581 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1582 1583 PetscCall(MatSetBlockSize(J, nc)); 1584 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1585 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1586 1587 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1588 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1589 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1590 1591 /* 1592 For each node in the grid: we get the neighbors in the local (on processor ordering 1593 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1594 PETSc ordering. 1595 */ 1596 if (!da->prealloc_only) { 1597 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1598 for (i = xs; i < xs + nx; i++) { 1599 istart = PetscMax(-s, gxs - i); 1600 iend = PetscMin(s, gxs + gnx - i - 1); 1601 slot = i - gxs; 1602 1603 cnt = 0; 1604 for (i1 = istart; i1 < iend + 1; i1++) { 1605 cols[cnt++] = nc * (slot + i1); 1606 for (l = 1; l < nc; l++) { 1607 cols[cnt] = 1 + cols[cnt - 1]; 1608 cnt++; 1609 } 1610 } 1611 rows[0] = nc * (slot); 1612 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1613 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1614 } 1615 /* do not copy values to GPU since they are all zero and not yet needed there */ 1616 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1617 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1618 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1619 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1620 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1621 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1622 PetscCall(PetscFree2(rows, cols)); 1623 } 1624 PetscFunctionReturn(PETSC_SUCCESS); 1625 } 1626 1627 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1628 { 1629 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1630 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1631 PetscInt istart, iend; 1632 DMBoundaryType bx; 1633 ISLocalToGlobalMapping ltog, mltog; 1634 1635 PetscFunctionBegin; 1636 /* 1637 nc - number of components per grid point 1638 col - number of colors needed in one direction for single component problem 1639 */ 1640 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1641 col = 2 * s + 1; 1642 1643 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1644 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1645 1646 PetscCall(MatSetBlockSize(J, nc)); 1647 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1648 1649 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1650 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1651 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1652 1653 /* 1654 For each node in the grid: we get the neighbors in the local (on processor ordering 1655 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1656 PETSc ordering. 1657 */ 1658 if (!da->prealloc_only) { 1659 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1660 for (i = xs; i < xs + nx; i++) { 1661 istart = PetscMax(-s, gxs - i); 1662 iend = PetscMin(s, gxs + gnx - i - 1); 1663 slot = i - gxs; 1664 1665 cnt = 0; 1666 for (i1 = istart; i1 < iend + 1; i1++) { 1667 cols[cnt++] = nc * (slot + i1); 1668 for (l = 1; l < nc; l++) { 1669 cols[cnt] = 1 + cols[cnt - 1]; 1670 cnt++; 1671 } 1672 } 1673 rows[0] = nc * (slot); 1674 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1675 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1676 } 1677 /* do not copy values to GPU since they are all zero and not yet needed there */ 1678 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1679 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1680 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1681 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1682 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1683 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1684 PetscCall(PetscFree2(rows, cols)); 1685 } 1686 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1687 PetscFunctionReturn(PETSC_SUCCESS); 1688 } 1689 1690 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1691 { 1692 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1693 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1694 PetscInt istart, iend, jstart, jend, ii, jj; 1695 MPI_Comm comm; 1696 PetscScalar *values; 1697 DMBoundaryType bx, by; 1698 DMDAStencilType st; 1699 ISLocalToGlobalMapping ltog; 1700 1701 PetscFunctionBegin; 1702 /* 1703 nc - number of components per grid point 1704 col - number of colors needed in one direction for single component problem 1705 */ 1706 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1707 col = 2 * s + 1; 1708 1709 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1710 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1711 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1712 1713 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1714 1715 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1716 1717 /* determine the matrix preallocation information */ 1718 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1719 for (i = xs; i < xs + nx; i++) { 1720 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1721 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1722 for (j = ys; j < ys + ny; j++) { 1723 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1724 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1725 slot = i - gxs + gnx * (j - gys); 1726 1727 /* Find block columns in block row */ 1728 cnt = 0; 1729 for (ii = istart; ii < iend + 1; ii++) { 1730 for (jj = jstart; jj < jend + 1; jj++) { 1731 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1732 cols[cnt++] = slot + ii + gnx * jj; 1733 } 1734 } 1735 } 1736 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1737 } 1738 } 1739 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1740 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1741 MatPreallocateEnd(dnz, onz); 1742 1743 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1744 1745 /* 1746 For each node in the grid: we get the neighbors in the local (on processor ordering 1747 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1748 PETSc ordering. 1749 */ 1750 if (!da->prealloc_only) { 1751 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1752 for (i = xs; i < xs + nx; i++) { 1753 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1754 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1755 for (j = ys; j < ys + ny; j++) { 1756 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1757 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1758 slot = i - gxs + gnx * (j - gys); 1759 cnt = 0; 1760 for (ii = istart; ii < iend + 1; ii++) { 1761 for (jj = jstart; jj < jend + 1; jj++) { 1762 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1763 cols[cnt++] = slot + ii + gnx * jj; 1764 } 1765 } 1766 } 1767 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1768 } 1769 } 1770 PetscCall(PetscFree(values)); 1771 /* do not copy values to GPU since they are all zero and not yet needed there */ 1772 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1773 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1774 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1775 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1776 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1777 } 1778 PetscCall(PetscFree(cols)); 1779 PetscFunctionReturn(PETSC_SUCCESS); 1780 } 1781 1782 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1783 { 1784 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1785 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1786 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1787 MPI_Comm comm; 1788 PetscScalar *values; 1789 DMBoundaryType bx, by, bz; 1790 DMDAStencilType st; 1791 ISLocalToGlobalMapping ltog; 1792 1793 PetscFunctionBegin; 1794 /* 1795 nc - number of components per grid point 1796 col - number of colors needed in one direction for single component problem 1797 */ 1798 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1799 col = 2 * s + 1; 1800 1801 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1802 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1803 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1804 1805 PetscCall(PetscMalloc1(col * col * col, &cols)); 1806 1807 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1808 1809 /* determine the matrix preallocation information */ 1810 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1811 for (i = xs; i < xs + nx; i++) { 1812 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1813 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1814 for (j = ys; j < ys + ny; j++) { 1815 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1816 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1817 for (k = zs; k < zs + nz; k++) { 1818 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1819 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1820 1821 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1822 1823 /* Find block columns in block row */ 1824 cnt = 0; 1825 for (ii = istart; ii < iend + 1; ii++) { 1826 for (jj = jstart; jj < jend + 1; jj++) { 1827 for (kk = kstart; kk < kend + 1; kk++) { 1828 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1829 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1830 } 1831 } 1832 } 1833 } 1834 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1835 } 1836 } 1837 } 1838 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1839 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1840 MatPreallocateEnd(dnz, onz); 1841 1842 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1843 1844 /* 1845 For each node in the grid: we get the neighbors in the local (on processor ordering 1846 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1847 PETSc ordering. 1848 */ 1849 if (!da->prealloc_only) { 1850 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1851 for (i = xs; i < xs + nx; i++) { 1852 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1853 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1854 for (j = ys; j < ys + ny; j++) { 1855 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1856 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1857 for (k = zs; k < zs + nz; k++) { 1858 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1859 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1860 1861 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1862 1863 cnt = 0; 1864 for (ii = istart; ii < iend + 1; ii++) { 1865 for (jj = jstart; jj < jend + 1; jj++) { 1866 for (kk = kstart; kk < kend + 1; kk++) { 1867 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1868 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1869 } 1870 } 1871 } 1872 } 1873 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1874 } 1875 } 1876 } 1877 PetscCall(PetscFree(values)); 1878 /* do not copy values to GPU since they are all zero and not yet needed there */ 1879 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1880 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1881 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1882 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1883 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1884 } 1885 PetscCall(PetscFree(cols)); 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 /* 1890 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1891 identify in the local ordering with periodic domain. 1892 */ 1893 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1894 { 1895 PetscInt i, n; 1896 1897 PetscFunctionBegin; 1898 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1899 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1900 for (i = 0, n = 0; i < *cnt; i++) { 1901 if (col[i] >= *row) col[n++] = col[i]; 1902 } 1903 *cnt = n; 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1908 { 1909 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1910 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1911 PetscInt istart, iend, jstart, jend, ii, jj; 1912 MPI_Comm comm; 1913 PetscScalar *values; 1914 DMBoundaryType bx, by; 1915 DMDAStencilType st; 1916 ISLocalToGlobalMapping ltog; 1917 1918 PetscFunctionBegin; 1919 /* 1920 nc - number of components per grid point 1921 col - number of colors needed in one direction for single component problem 1922 */ 1923 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1924 col = 2 * s + 1; 1925 1926 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1927 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1928 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1929 1930 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1931 1932 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1933 1934 /* determine the matrix preallocation information */ 1935 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1936 for (i = xs; i < xs + nx; i++) { 1937 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1938 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1939 for (j = ys; j < ys + ny; j++) { 1940 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1941 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1942 slot = i - gxs + gnx * (j - gys); 1943 1944 /* Find block columns in block row */ 1945 cnt = 0; 1946 for (ii = istart; ii < iend + 1; ii++) { 1947 for (jj = jstart; jj < jend + 1; jj++) { 1948 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1949 } 1950 } 1951 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1952 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1953 } 1954 } 1955 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1956 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1957 MatPreallocateEnd(dnz, onz); 1958 1959 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1960 1961 /* 1962 For each node in the grid: we get the neighbors in the local (on processor ordering 1963 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1964 PETSc ordering. 1965 */ 1966 if (!da->prealloc_only) { 1967 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1968 for (i = xs; i < xs + nx; i++) { 1969 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1970 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1971 for (j = ys; j < ys + ny; j++) { 1972 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1973 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1974 slot = i - gxs + gnx * (j - gys); 1975 1976 /* Find block columns in block row */ 1977 cnt = 0; 1978 for (ii = istart; ii < iend + 1; ii++) { 1979 for (jj = jstart; jj < jend + 1; jj++) { 1980 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1981 } 1982 } 1983 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1984 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1985 } 1986 } 1987 PetscCall(PetscFree(values)); 1988 /* do not copy values to GPU since they are all zero and not yet needed there */ 1989 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1990 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1991 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1992 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1993 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1994 } 1995 PetscCall(PetscFree(cols)); 1996 PetscFunctionReturn(PETSC_SUCCESS); 1997 } 1998 1999 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2000 { 2001 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2002 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2003 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2004 MPI_Comm comm; 2005 PetscScalar *values; 2006 DMBoundaryType bx, by, bz; 2007 DMDAStencilType st; 2008 ISLocalToGlobalMapping ltog; 2009 2010 PetscFunctionBegin; 2011 /* 2012 nc - number of components per grid point 2013 col - number of colors needed in one direction for single component problem 2014 */ 2015 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2016 col = 2 * s + 1; 2017 2018 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2019 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2020 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2021 2022 /* create the matrix */ 2023 PetscCall(PetscMalloc1(col * col * col, &cols)); 2024 2025 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2026 2027 /* determine the matrix preallocation information */ 2028 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2029 for (i = xs; i < xs + nx; i++) { 2030 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2031 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2032 for (j = ys; j < ys + ny; j++) { 2033 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2034 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2035 for (k = zs; k < zs + nz; k++) { 2036 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2037 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2038 2039 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2040 2041 /* Find block columns in block row */ 2042 cnt = 0; 2043 for (ii = istart; ii < iend + 1; ii++) { 2044 for (jj = jstart; jj < jend + 1; jj++) { 2045 for (kk = kstart; kk < kend + 1; kk++) { 2046 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2047 } 2048 } 2049 } 2050 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2051 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2052 } 2053 } 2054 } 2055 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2056 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2057 MatPreallocateEnd(dnz, onz); 2058 2059 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2060 2061 /* 2062 For each node in the grid: we get the neighbors in the local (on processor ordering 2063 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2064 PETSc ordering. 2065 */ 2066 if (!da->prealloc_only) { 2067 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2068 for (i = xs; i < xs + nx; i++) { 2069 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2070 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2071 for (j = ys; j < ys + ny; j++) { 2072 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2073 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2074 for (k = zs; k < zs + nz; k++) { 2075 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2076 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2077 2078 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2079 2080 cnt = 0; 2081 for (ii = istart; ii < iend + 1; ii++) { 2082 for (jj = jstart; jj < jend + 1; jj++) { 2083 for (kk = kstart; kk < kend + 1; kk++) { 2084 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2085 } 2086 } 2087 } 2088 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2089 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2090 } 2091 } 2092 } 2093 PetscCall(PetscFree(values)); 2094 /* do not copy values to GPU since they are all zero and not yet needed there */ 2095 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2096 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2097 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2098 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2099 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2100 } 2101 PetscCall(PetscFree(cols)); 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2106 { 2107 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2108 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2109 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2110 DM_DA *dd = (DM_DA *)da->data; 2111 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2112 MPI_Comm comm; 2113 PetscScalar *values; 2114 DMBoundaryType bx, by, bz; 2115 ISLocalToGlobalMapping ltog; 2116 DMDAStencilType st; 2117 PetscBool removedups = PETSC_FALSE; 2118 2119 PetscFunctionBegin; 2120 /* 2121 nc - number of components per grid point 2122 col - number of colors needed in one direction for single component problem 2123 */ 2124 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2125 col = 2 * s + 1; 2126 2127 /* 2128 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2129 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2130 */ 2131 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2132 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2133 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2134 2135 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2136 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2137 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2138 2139 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2140 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2141 2142 /* determine the matrix preallocation information */ 2143 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2144 2145 PetscCall(MatSetBlockSize(J, nc)); 2146 for (i = xs; i < xs + nx; i++) { 2147 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2148 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2149 for (j = ys; j < ys + ny; j++) { 2150 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2151 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2152 for (k = zs; k < zs + nz; k++) { 2153 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2154 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2155 2156 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2157 2158 for (l = 0; l < nc; l++) { 2159 cnt = 0; 2160 for (ii = istart; ii < iend + 1; ii++) { 2161 for (jj = jstart; jj < jend + 1; jj++) { 2162 for (kk = kstart; kk < kend + 1; kk++) { 2163 if (ii || jj || kk) { 2164 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2165 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2166 } 2167 } else { 2168 if (dfill) { 2169 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2170 } else { 2171 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2172 } 2173 } 2174 } 2175 } 2176 } 2177 row = l + nc * (slot); 2178 maxcnt = PetscMax(maxcnt, cnt); 2179 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2180 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2181 } 2182 } 2183 } 2184 } 2185 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2186 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2187 MatPreallocateEnd(dnz, onz); 2188 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2189 2190 /* 2191 For each node in the grid: we get the neighbors in the local (on processor ordering 2192 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2193 PETSc ordering. 2194 */ 2195 if (!da->prealloc_only) { 2196 PetscCall(PetscCalloc1(maxcnt, &values)); 2197 for (i = xs; i < xs + nx; i++) { 2198 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2199 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2200 for (j = ys; j < ys + ny; j++) { 2201 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2202 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2203 for (k = zs; k < zs + nz; k++) { 2204 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2205 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2206 2207 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2208 2209 for (l = 0; l < nc; l++) { 2210 cnt = 0; 2211 for (ii = istart; ii < iend + 1; ii++) { 2212 for (jj = jstart; jj < jend + 1; jj++) { 2213 for (kk = kstart; kk < kend + 1; kk++) { 2214 if (ii || jj || kk) { 2215 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2216 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2217 } 2218 } else { 2219 if (dfill) { 2220 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2221 } else { 2222 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2223 } 2224 } 2225 } 2226 } 2227 } 2228 row = l + nc * (slot); 2229 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2230 } 2231 } 2232 } 2233 } 2234 PetscCall(PetscFree(values)); 2235 /* do not copy values to GPU since they are all zero and not yet needed there */ 2236 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2237 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2238 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2239 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2240 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2241 } 2242 PetscCall(PetscFree(cols)); 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245