1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective on da 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective on da 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for dfill and ofill is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the dfill, ofill format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 /* ---------------------------------------------------------------------------------*/ 264 265 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 266 { 267 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 268 PetscInt ncolors = 0; 269 MPI_Comm comm; 270 DMBoundaryType bx, by; 271 DMDAStencilType st; 272 ISColoringValue *colors; 273 DM_DA *dd = (DM_DA *)da->data; 274 275 PetscFunctionBegin; 276 /* 277 nc - number of components per grid point 278 col - number of colors needed in one direction for single component problem 279 280 */ 281 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 282 col = 2 * s + 1; 283 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 284 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 285 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 286 287 /* special case as taught to us by Paul Hovland */ 288 if (st == DMDA_STENCIL_STAR && s == 1) { 289 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 290 } else { 291 if (ctype == IS_COLORING_GLOBAL) { 292 if (!dd->localcoloring) { 293 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 294 ii = 0; 295 for (j = ys; j < ys + ny; j++) { 296 for (i = xs; i < xs + nx; i++) { 297 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 298 } 299 } 300 ncolors = nc + nc * (col - 1 + col * (col - 1)); 301 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 302 } 303 *coloring = dd->localcoloring; 304 } else if (ctype == IS_COLORING_LOCAL) { 305 if (!dd->ghostedcoloring) { 306 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 307 ii = 0; 308 for (j = gys; j < gys + gny; j++) { 309 for (i = gxs; i < gxs + gnx; i++) { 310 for (k = 0; k < nc; k++) { 311 /* the complicated stuff is to handle periodic boundaries */ 312 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 313 } 314 } 315 } 316 ncolors = nc + nc * (col - 1 + col * (col - 1)); 317 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 318 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 319 320 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 321 } 322 *coloring = dd->ghostedcoloring; 323 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 324 } 325 PetscCall(ISColoringReference(*coloring)); 326 PetscFunctionReturn(PETSC_SUCCESS); 327 } 328 329 /* ---------------------------------------------------------------------------------*/ 330 331 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 332 { 333 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 334 PetscInt ncolors; 335 MPI_Comm comm; 336 DMBoundaryType bx, by, bz; 337 DMDAStencilType st; 338 ISColoringValue *colors; 339 DM_DA *dd = (DM_DA *)da->data; 340 341 PetscFunctionBegin; 342 /* 343 nc - number of components per grid point 344 col - number of colors needed in one direction for single component problem 345 */ 346 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 347 col = 2 * s + 1; 348 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 349 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 350 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 351 352 /* create the coloring */ 353 if (ctype == IS_COLORING_GLOBAL) { 354 if (!dd->localcoloring) { 355 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 356 ii = 0; 357 for (k = zs; k < zs + nz; k++) { 358 for (j = ys; j < ys + ny; j++) { 359 for (i = xs; i < xs + nx; i++) { 360 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 361 } 362 } 363 } 364 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 365 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 366 } 367 *coloring = dd->localcoloring; 368 } else if (ctype == IS_COLORING_LOCAL) { 369 if (!dd->ghostedcoloring) { 370 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 371 ii = 0; 372 for (k = gzs; k < gzs + gnz; k++) { 373 for (j = gys; j < gys + gny; j++) { 374 for (i = gxs; i < gxs + gnx; i++) { 375 for (l = 0; l < nc; l++) { 376 /* the complicated stuff is to handle periodic boundaries */ 377 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 378 } 379 } 380 } 381 } 382 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 383 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 384 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 385 } 386 *coloring = dd->ghostedcoloring; 387 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 388 PetscCall(ISColoringReference(*coloring)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 392 /* ---------------------------------------------------------------------------------*/ 393 394 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 395 { 396 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 397 PetscInt ncolors; 398 MPI_Comm comm; 399 DMBoundaryType bx; 400 ISColoringValue *colors; 401 DM_DA *dd = (DM_DA *)da->data; 402 403 PetscFunctionBegin; 404 /* 405 nc - number of components per grid point 406 col - number of colors needed in one direction for single component problem 407 */ 408 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 409 col = 2 * s + 1; 410 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 411 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 412 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 413 414 /* create the coloring */ 415 if (ctype == IS_COLORING_GLOBAL) { 416 if (!dd->localcoloring) { 417 PetscCall(PetscMalloc1(nc * nx, &colors)); 418 if (dd->ofillcols) { 419 PetscInt tc = 0; 420 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 421 i1 = 0; 422 for (i = xs; i < xs + nx; i++) { 423 for (l = 0; l < nc; l++) { 424 if (dd->ofillcols[l] && (i % col)) { 425 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 426 } else { 427 colors[i1++] = l; 428 } 429 } 430 } 431 ncolors = nc + 2 * s * tc; 432 } else { 433 i1 = 0; 434 for (i = xs; i < xs + nx; i++) { 435 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 436 } 437 ncolors = nc + nc * (col - 1); 438 } 439 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 440 } 441 *coloring = dd->localcoloring; 442 } else if (ctype == IS_COLORING_LOCAL) { 443 if (!dd->ghostedcoloring) { 444 PetscCall(PetscMalloc1(nc * gnx, &colors)); 445 i1 = 0; 446 for (i = gxs; i < gxs + gnx; i++) { 447 for (l = 0; l < nc; l++) { 448 /* the complicated stuff is to handle periodic boundaries */ 449 colors[i1++] = l + nc * (SetInRange(i, m) % col); 450 } 451 } 452 ncolors = nc + nc * (col - 1); 453 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 454 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 455 } 456 *coloring = dd->ghostedcoloring; 457 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 458 PetscCall(ISColoringReference(*coloring)); 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 463 { 464 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 465 PetscInt ncolors; 466 MPI_Comm comm; 467 DMBoundaryType bx, by; 468 ISColoringValue *colors; 469 DM_DA *dd = (DM_DA *)da->data; 470 471 PetscFunctionBegin; 472 /* 473 nc - number of components per grid point 474 col - number of colors needed in one direction for single component problem 475 */ 476 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 477 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 478 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 479 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 480 /* create the coloring */ 481 if (ctype == IS_COLORING_GLOBAL) { 482 if (!dd->localcoloring) { 483 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 484 ii = 0; 485 for (j = ys; j < ys + ny; j++) { 486 for (i = xs; i < xs + nx; i++) { 487 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 488 } 489 } 490 ncolors = 5 * nc; 491 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 492 } 493 *coloring = dd->localcoloring; 494 } else if (ctype == IS_COLORING_LOCAL) { 495 if (!dd->ghostedcoloring) { 496 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 497 ii = 0; 498 for (j = gys; j < gys + gny; j++) { 499 for (i = gxs; i < gxs + gnx; i++) { 500 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 501 } 502 } 503 ncolors = 5 * nc; 504 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 505 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 506 } 507 *coloring = dd->ghostedcoloring; 508 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 509 PetscFunctionReturn(PETSC_SUCCESS); 510 } 511 512 /* =========================================================================== */ 513 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 515 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 516 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 527 528 /*@C 529 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 530 531 Logically Collective on mat 532 533 Input Parameters: 534 + mat - the matrix 535 - da - the da 536 537 Level: intermediate 538 539 .seealso: `Mat`, `MatSetUp()` 540 @*/ 541 PetscErrorCode MatSetupDM(Mat mat, DM da) 542 { 543 PetscFunctionBegin; 544 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 545 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 546 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 547 PetscFunctionReturn(PETSC_SUCCESS); 548 } 549 550 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 551 { 552 DM da; 553 const char *prefix; 554 Mat Anatural; 555 AO ao; 556 PetscInt rstart, rend, *petsc, i; 557 IS is; 558 MPI_Comm comm; 559 PetscViewerFormat format; 560 561 PetscFunctionBegin; 562 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 563 PetscCall(PetscViewerGetFormat(viewer, &format)); 564 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 565 566 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 567 PetscCall(MatGetDM(A, &da)); 568 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 569 570 PetscCall(DMDAGetAO(da, &ao)); 571 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 572 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 573 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 574 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 575 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 576 577 /* call viewer on natural ordering */ 578 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 579 PetscCall(ISDestroy(&is)); 580 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 581 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 582 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 583 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 584 PetscCall(MatView(Anatural, viewer)); 585 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 586 PetscCall(MatDestroy(&Anatural)); 587 PetscFunctionReturn(PETSC_SUCCESS); 588 } 589 590 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 591 { 592 DM da; 593 Mat Anatural, Aapp; 594 AO ao; 595 PetscInt rstart, rend, *app, i, m, n, M, N; 596 IS is; 597 MPI_Comm comm; 598 599 PetscFunctionBegin; 600 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 601 PetscCall(MatGetDM(A, &da)); 602 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 603 604 /* Load the matrix in natural ordering */ 605 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 606 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 607 PetscCall(MatGetSize(A, &M, &N)); 608 PetscCall(MatGetLocalSize(A, &m, &n)); 609 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 610 PetscCall(MatLoad(Anatural, viewer)); 611 612 /* Map natural ordering to application ordering and create IS */ 613 PetscCall(DMDAGetAO(da, &ao)); 614 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 615 PetscCall(PetscMalloc1(rend - rstart, &app)); 616 for (i = rstart; i < rend; i++) app[i - rstart] = i; 617 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 618 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 619 620 /* Do permutation and replace header */ 621 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 622 PetscCall(MatHeaderReplace(A, &Aapp)); 623 PetscCall(ISDestroy(&is)); 624 PetscCall(MatDestroy(&Anatural)); 625 PetscFunctionReturn(PETSC_SUCCESS); 626 } 627 628 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 629 { 630 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 631 Mat A; 632 MPI_Comm comm; 633 MatType Atype; 634 MatType mtype; 635 PetscMPIInt size; 636 DM_DA *dd = (DM_DA *)da->data; 637 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 638 639 PetscFunctionBegin; 640 PetscCall(MatInitializePackage()); 641 mtype = da->mattype; 642 643 /* 644 m 645 ------------------------------------------------------ 646 | | 647 | | 648 | ---------------------- | 649 | | | | 650 n | ny | | | 651 | | | | 652 | .--------------------- | 653 | (xs,ys) nx | 654 | . | 655 | (gxs,gys) | 656 | | 657 ----------------------------------------------------- 658 */ 659 660 /* 661 nc - number of components per grid point 662 col - number of colors needed in one direction for single component problem 663 */ 664 M = dd->M; 665 N = dd->N; 666 P = dd->P; 667 dim = da->dim; 668 dof = dd->w; 669 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 670 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 671 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 672 PetscCall(MatCreate(comm, &A)); 673 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 674 PetscCall(MatSetType(A, mtype)); 675 PetscCall(MatSetFromOptions(A)); 676 if (dof * nx * ny * nz < da->bind_below) { 677 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 678 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 679 } 680 PetscCall(MatSetDM(A, da)); 681 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 682 PetscCall(MatGetType(A, &Atype)); 683 /* 684 We do not provide a getmatrix function in the DMDA operations because 685 the basic DMDA does not know about matrices. We think of DMDA as being more 686 more low-level than matrices. This is kind of cheating but, cause sometimes 687 we think of DMDA has higher level than matrices. 688 689 We could switch based on Atype (or mtype), but we do not since the 690 specialized setting routines depend only on the particular preallocation 691 details of the matrix, not the type itself. 692 */ 693 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 694 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 695 if (!aij) { 696 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 697 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 698 if (!baij) { 699 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 700 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 701 if (!sbaij) { 702 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 703 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 704 } 705 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 706 } 707 } 708 if (aij) { 709 if (dim == 1) { 710 if (dd->ofill) { 711 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 712 } else { 713 DMBoundaryType bx; 714 PetscMPIInt size; 715 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 716 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 717 if (size == 1 && bx == DM_BOUNDARY_NONE) { 718 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 719 } else { 720 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 721 } 722 } 723 } else if (dim == 2) { 724 if (dd->ofill) { 725 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 726 } else { 727 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 728 } 729 } else if (dim == 3) { 730 if (dd->ofill) { 731 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 732 } else { 733 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 734 } 735 } 736 } else if (baij) { 737 if (dim == 2) { 738 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 739 } else if (dim == 3) { 740 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 741 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 742 } else if (sbaij) { 743 if (dim == 2) { 744 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 745 } else if (dim == 3) { 746 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 747 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 748 } else if (sell) { 749 if (dim == 2) { 750 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 751 } else if (dim == 3) { 752 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 753 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 754 } else if (is) { 755 PetscCall(DMCreateMatrix_DA_IS(da, A)); 756 } else { 757 ISLocalToGlobalMapping ltog; 758 759 PetscCall(MatSetBlockSize(A, dof)); 760 PetscCall(MatSetUp(A)); 761 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 762 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 763 } 764 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 765 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 766 PetscCall(MatSetDM(A, da)); 767 PetscCallMPI(MPI_Comm_size(comm, &size)); 768 if (size > 1) { 769 /* change viewer to display matrix in natural ordering */ 770 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 771 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 772 } 773 *J = A; 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 /* ---------------------------------------------------------------------------------*/ 778 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 779 780 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 781 { 782 DM_DA *da = (DM_DA *)dm->data; 783 Mat lJ, P; 784 ISLocalToGlobalMapping ltog; 785 IS is; 786 PetscBT bt; 787 const PetscInt *e_loc, *idx; 788 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 789 790 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 791 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 792 PetscFunctionBegin; 793 dof = da->w; 794 PetscCall(MatSetBlockSize(J, dof)); 795 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 796 797 /* flag local elements indices in local DMDA numbering */ 798 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 799 PetscCall(PetscBTCreate(nv / dof, &bt)); 800 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 801 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 802 803 /* filter out (set to -1) the global indices not used by the local elements */ 804 PetscCall(PetscMalloc1(nv / dof, &gidx)); 805 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 806 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 807 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 808 for (i = 0; i < nv / dof; i++) 809 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 810 PetscCall(PetscBTDestroy(&bt)); 811 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 812 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 813 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 814 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 815 PetscCall(ISDestroy(&is)); 816 817 /* Preallocation */ 818 if (dm->prealloc_skip) { 819 PetscCall(MatSetUp(J)); 820 } else { 821 PetscCall(MatISGetLocalMat(J, &lJ)); 822 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 823 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 824 PetscCall(MatSetType(P, MATPREALLOCATOR)); 825 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 826 PetscCall(MatGetSize(lJ, &N, NULL)); 827 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 828 PetscCall(MatSetSizes(P, n, n, N, N)); 829 PetscCall(MatSetBlockSize(P, dof)); 830 PetscCall(MatSetUp(P)); 831 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 832 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 833 PetscCall(MatISRestoreLocalMat(J, &lJ)); 834 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 835 PetscCall(MatDestroy(&P)); 836 837 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 838 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 839 } 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 844 { 845 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 846 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 847 MPI_Comm comm; 848 PetscScalar *values; 849 DMBoundaryType bx, by; 850 ISLocalToGlobalMapping ltog; 851 DMDAStencilType st; 852 853 PetscFunctionBegin; 854 /* 855 nc - number of components per grid point 856 col - number of colors needed in one direction for single component problem 857 */ 858 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 859 col = 2 * s + 1; 860 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 861 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 862 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 863 864 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 865 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 866 867 PetscCall(MatSetBlockSize(J, nc)); 868 /* determine the matrix preallocation information */ 869 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 870 for (i = xs; i < xs + nx; i++) { 871 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 872 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 873 874 for (j = ys; j < ys + ny; j++) { 875 slot = i - gxs + gnx * (j - gys); 876 877 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 878 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 879 880 cnt = 0; 881 for (k = 0; k < nc; k++) { 882 for (l = lstart; l < lend + 1; l++) { 883 for (p = pstart; p < pend + 1; p++) { 884 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 885 cols[cnt++] = k + nc * (slot + gnx * l + p); 886 } 887 } 888 } 889 rows[k] = k + nc * (slot); 890 } 891 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 892 } 893 } 894 PetscCall(MatSetBlockSize(J, nc)); 895 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 896 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 897 MatPreallocateEnd(dnz, onz); 898 899 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 900 901 /* 902 For each node in the grid: we get the neighbors in the local (on processor ordering 903 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 904 PETSc ordering. 905 */ 906 if (!da->prealloc_only) { 907 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 908 for (i = xs; i < xs + nx; i++) { 909 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 910 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 911 912 for (j = ys; j < ys + ny; j++) { 913 slot = i - gxs + gnx * (j - gys); 914 915 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 916 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 917 918 cnt = 0; 919 for (k = 0; k < nc; k++) { 920 for (l = lstart; l < lend + 1; l++) { 921 for (p = pstart; p < pend + 1; p++) { 922 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 923 cols[cnt++] = k + nc * (slot + gnx * l + p); 924 } 925 } 926 } 927 rows[k] = k + nc * (slot); 928 } 929 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 930 } 931 } 932 PetscCall(PetscFree(values)); 933 /* do not copy values to GPU since they are all zero and not yet needed there */ 934 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 935 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 936 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 937 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 938 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 939 } 940 PetscCall(PetscFree2(rows, cols)); 941 PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 945 { 946 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 947 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 948 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 949 MPI_Comm comm; 950 PetscScalar *values; 951 DMBoundaryType bx, by, bz; 952 ISLocalToGlobalMapping ltog; 953 DMDAStencilType st; 954 955 PetscFunctionBegin; 956 /* 957 nc - number of components per grid point 958 col - number of colors needed in one direction for single component problem 959 */ 960 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 961 col = 2 * s + 1; 962 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 963 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 964 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 965 966 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 967 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 968 969 PetscCall(MatSetBlockSize(J, nc)); 970 /* determine the matrix preallocation information */ 971 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 972 for (i = xs; i < xs + nx; i++) { 973 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 974 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 975 for (j = ys; j < ys + ny; j++) { 976 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 977 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 978 for (k = zs; k < zs + nz; k++) { 979 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 980 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 981 982 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 983 984 cnt = 0; 985 for (l = 0; l < nc; l++) { 986 for (ii = istart; ii < iend + 1; ii++) { 987 for (jj = jstart; jj < jend + 1; jj++) { 988 for (kk = kstart; kk < kend + 1; kk++) { 989 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 990 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 991 } 992 } 993 } 994 } 995 rows[l] = l + nc * (slot); 996 } 997 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 998 } 999 } 1000 } 1001 PetscCall(MatSetBlockSize(J, nc)); 1002 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1003 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1004 MatPreallocateEnd(dnz, onz); 1005 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1006 1007 /* 1008 For each node in the grid: we get the neighbors in the local (on processor ordering 1009 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1010 PETSc ordering. 1011 */ 1012 if (!da->prealloc_only) { 1013 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1014 for (i = xs; i < xs + nx; i++) { 1015 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1016 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1017 for (j = ys; j < ys + ny; j++) { 1018 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1019 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1020 for (k = zs; k < zs + nz; k++) { 1021 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1022 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1023 1024 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1025 1026 cnt = 0; 1027 for (l = 0; l < nc; l++) { 1028 for (ii = istart; ii < iend + 1; ii++) { 1029 for (jj = jstart; jj < jend + 1; jj++) { 1030 for (kk = kstart; kk < kend + 1; kk++) { 1031 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1032 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1033 } 1034 } 1035 } 1036 } 1037 rows[l] = l + nc * (slot); 1038 } 1039 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1040 } 1041 } 1042 } 1043 PetscCall(PetscFree(values)); 1044 /* do not copy values to GPU since they are all zero and not yet needed there */ 1045 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1046 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1047 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1048 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1049 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1050 } 1051 PetscCall(PetscFree2(rows, cols)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1056 { 1057 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1058 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1059 MPI_Comm comm; 1060 DMBoundaryType bx, by; 1061 ISLocalToGlobalMapping ltog, mltog; 1062 DMDAStencilType st; 1063 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1064 1065 PetscFunctionBegin; 1066 /* 1067 nc - number of components per grid point 1068 col - number of colors needed in one direction for single component problem 1069 */ 1070 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1071 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1072 col = 2 * s + 1; 1073 /* 1074 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1075 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1076 */ 1077 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1078 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1079 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1080 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1081 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1082 1083 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1084 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1085 1086 PetscCall(MatSetBlockSize(J, nc)); 1087 /* determine the matrix preallocation information */ 1088 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1089 for (i = xs; i < xs + nx; i++) { 1090 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1091 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1092 1093 for (j = ys; j < ys + ny; j++) { 1094 slot = i - gxs + gnx * (j - gys); 1095 1096 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1097 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1098 1099 cnt = 0; 1100 for (k = 0; k < nc; k++) { 1101 for (l = lstart; l < lend + 1; l++) { 1102 for (p = pstart; p < pend + 1; p++) { 1103 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1104 cols[cnt++] = k + nc * (slot + gnx * l + p); 1105 } 1106 } 1107 } 1108 rows[k] = k + nc * (slot); 1109 } 1110 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1111 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1112 } 1113 } 1114 PetscCall(MatSetBlockSize(J, nc)); 1115 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1116 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1117 MatPreallocateEnd(dnz, onz); 1118 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1119 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1120 1121 /* 1122 For each node in the grid: we get the neighbors in the local (on processor ordering 1123 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1124 PETSc ordering. 1125 */ 1126 if (!da->prealloc_only) { 1127 for (i = xs; i < xs + nx; i++) { 1128 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1129 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1130 1131 for (j = ys; j < ys + ny; j++) { 1132 slot = i - gxs + gnx * (j - gys); 1133 1134 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1135 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1136 1137 cnt = 0; 1138 for (l = lstart; l < lend + 1; l++) { 1139 for (p = pstart; p < pend + 1; p++) { 1140 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1141 cols[cnt++] = nc * (slot + gnx * l + p); 1142 for (k = 1; k < nc; k++) { 1143 cols[cnt] = 1 + cols[cnt - 1]; 1144 cnt++; 1145 } 1146 } 1147 } 1148 } 1149 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1150 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1151 } 1152 } 1153 /* do not copy values to GPU since they are all zero and not yet needed there */ 1154 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1155 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1156 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1157 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1158 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1159 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1160 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1161 } 1162 PetscCall(PetscFree2(rows, cols)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1167 { 1168 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1169 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1170 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1171 DM_DA *dd = (DM_DA *)da->data; 1172 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1173 MPI_Comm comm; 1174 DMBoundaryType bx, by; 1175 ISLocalToGlobalMapping ltog; 1176 DMDAStencilType st; 1177 PetscBool removedups = PETSC_FALSE; 1178 1179 PetscFunctionBegin; 1180 /* 1181 nc - number of components per grid point 1182 col - number of colors needed in one direction for single component problem 1183 */ 1184 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1185 col = 2 * s + 1; 1186 /* 1187 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1188 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1189 */ 1190 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1191 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1192 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1193 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1194 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1195 1196 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1197 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1198 1199 PetscCall(MatSetBlockSize(J, nc)); 1200 /* determine the matrix preallocation information */ 1201 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1202 for (i = xs; i < xs + nx; i++) { 1203 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1204 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1205 1206 for (j = ys; j < ys + ny; j++) { 1207 slot = i - gxs + gnx * (j - gys); 1208 1209 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1210 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1211 1212 for (k = 0; k < nc; k++) { 1213 cnt = 0; 1214 for (l = lstart; l < lend + 1; l++) { 1215 for (p = pstart; p < pend + 1; p++) { 1216 if (l || p) { 1217 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1218 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1219 } 1220 } else { 1221 if (dfill) { 1222 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1223 } else { 1224 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1225 } 1226 } 1227 } 1228 } 1229 row = k + nc * (slot); 1230 maxcnt = PetscMax(maxcnt, cnt); 1231 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1232 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1233 } 1234 } 1235 } 1236 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1237 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1238 MatPreallocateEnd(dnz, onz); 1239 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1240 1241 /* 1242 For each node in the grid: we get the neighbors in the local (on processor ordering 1243 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1244 PETSc ordering. 1245 */ 1246 if (!da->prealloc_only) { 1247 for (i = xs; i < xs + nx; i++) { 1248 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1249 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1250 1251 for (j = ys; j < ys + ny; j++) { 1252 slot = i - gxs + gnx * (j - gys); 1253 1254 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1255 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1256 1257 for (k = 0; k < nc; k++) { 1258 cnt = 0; 1259 for (l = lstart; l < lend + 1; l++) { 1260 for (p = pstart; p < pend + 1; p++) { 1261 if (l || p) { 1262 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1263 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1264 } 1265 } else { 1266 if (dfill) { 1267 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1268 } else { 1269 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1270 } 1271 } 1272 } 1273 } 1274 row = k + nc * (slot); 1275 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1276 } 1277 } 1278 } 1279 /* do not copy values to GPU since they are all zero and not yet needed there */ 1280 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1281 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1282 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1283 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1284 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1285 } 1286 PetscCall(PetscFree(cols)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 /* ---------------------------------------------------------------------------------*/ 1291 1292 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1293 { 1294 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1295 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1296 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1297 MPI_Comm comm; 1298 DMBoundaryType bx, by, bz; 1299 ISLocalToGlobalMapping ltog, mltog; 1300 DMDAStencilType st; 1301 PetscBool removedups = PETSC_FALSE; 1302 1303 PetscFunctionBegin; 1304 /* 1305 nc - number of components per grid point 1306 col - number of colors needed in one direction for single component problem 1307 */ 1308 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1309 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1310 col = 2 * s + 1; 1311 1312 /* 1313 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1314 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1315 */ 1316 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1317 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1318 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1319 1320 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1321 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1322 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1323 1324 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1325 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1326 1327 PetscCall(MatSetBlockSize(J, nc)); 1328 /* determine the matrix preallocation information */ 1329 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1330 for (i = xs; i < xs + nx; i++) { 1331 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1332 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1333 for (j = ys; j < ys + ny; j++) { 1334 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1335 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1336 for (k = zs; k < zs + nz; k++) { 1337 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1338 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1339 1340 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1341 1342 cnt = 0; 1343 for (l = 0; l < nc; l++) { 1344 for (ii = istart; ii < iend + 1; ii++) { 1345 for (jj = jstart; jj < jend + 1; jj++) { 1346 for (kk = kstart; kk < kend + 1; kk++) { 1347 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1348 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1349 } 1350 } 1351 } 1352 } 1353 rows[l] = l + nc * (slot); 1354 } 1355 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1356 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1357 } 1358 } 1359 } 1360 PetscCall(MatSetBlockSize(J, nc)); 1361 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1362 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1363 MatPreallocateEnd(dnz, onz); 1364 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1365 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1366 1367 /* 1368 For each node in the grid: we get the neighbors in the local (on processor ordering 1369 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1370 PETSc ordering. 1371 */ 1372 if (!da->prealloc_only) { 1373 for (i = xs; i < xs + nx; i++) { 1374 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1375 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1376 for (j = ys; j < ys + ny; j++) { 1377 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1378 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1379 for (k = zs; k < zs + nz; k++) { 1380 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1381 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1382 1383 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1384 1385 cnt = 0; 1386 for (kk = kstart; kk < kend + 1; kk++) { 1387 for (jj = jstart; jj < jend + 1; jj++) { 1388 for (ii = istart; ii < iend + 1; ii++) { 1389 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1390 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1391 for (l = 1; l < nc; l++) { 1392 cols[cnt] = 1 + cols[cnt - 1]; 1393 cnt++; 1394 } 1395 } 1396 } 1397 } 1398 } 1399 rows[0] = nc * (slot); 1400 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1401 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1402 } 1403 } 1404 } 1405 /* do not copy values to GPU since they are all zero and not yet needed there */ 1406 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1407 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1408 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1409 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1410 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1411 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1412 } 1413 PetscCall(PetscFree2(rows, cols)); 1414 PetscFunctionReturn(PETSC_SUCCESS); 1415 } 1416 1417 /* ---------------------------------------------------------------------------------*/ 1418 1419 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1420 { 1421 DM_DA *dd = (DM_DA *)da->data; 1422 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1423 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1424 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1425 DMBoundaryType bx; 1426 ISLocalToGlobalMapping ltog; 1427 PetscMPIInt rank, size; 1428 1429 PetscFunctionBegin; 1430 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1431 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1432 1433 /* 1434 nc - number of components per grid point 1435 */ 1436 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1437 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1438 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1439 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1440 1441 PetscCall(MatSetBlockSize(J, nc)); 1442 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1443 1444 /* 1445 note should be smaller for first and last process with no periodic 1446 does not handle dfill 1447 */ 1448 cnt = 0; 1449 /* coupling with process to the left */ 1450 for (i = 0; i < s; i++) { 1451 for (j = 0; j < nc; j++) { 1452 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1453 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1454 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1455 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1456 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1457 } 1458 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1459 cnt++; 1460 } 1461 } 1462 for (i = s; i < nx - s; i++) { 1463 for (j = 0; j < nc; j++) { 1464 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1465 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1466 cnt++; 1467 } 1468 } 1469 /* coupling with process to the right */ 1470 for (i = nx - s; i < nx; i++) { 1471 for (j = 0; j < nc; j++) { 1472 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1473 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1474 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1475 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1476 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1477 } 1478 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1479 cnt++; 1480 } 1481 } 1482 1483 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1484 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1485 PetscCall(PetscFree2(cols, ocols)); 1486 1487 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1488 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1489 1490 /* 1491 For each node in the grid: we get the neighbors in the local (on processor ordering 1492 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1493 PETSc ordering. 1494 */ 1495 if (!da->prealloc_only) { 1496 PetscCall(PetscMalloc1(maxcnt, &cols)); 1497 row = xs * nc; 1498 /* coupling with process to the left */ 1499 for (i = xs; i < xs + s; i++) { 1500 for (j = 0; j < nc; j++) { 1501 cnt = 0; 1502 if (rank) { 1503 for (l = 0; l < s; l++) { 1504 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1505 } 1506 } 1507 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1508 for (l = 0; l < s; l++) { 1509 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1510 } 1511 } 1512 if (dfill) { 1513 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1514 } else { 1515 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1516 } 1517 for (l = 0; l < s; l++) { 1518 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1519 } 1520 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1521 row++; 1522 } 1523 } 1524 for (i = xs + s; i < xs + nx - s; i++) { 1525 for (j = 0; j < nc; j++) { 1526 cnt = 0; 1527 for (l = 0; l < s; l++) { 1528 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1529 } 1530 if (dfill) { 1531 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1532 } else { 1533 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1534 } 1535 for (l = 0; l < s; l++) { 1536 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1537 } 1538 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1539 row++; 1540 } 1541 } 1542 /* coupling with process to the right */ 1543 for (i = xs + nx - s; i < xs + nx; i++) { 1544 for (j = 0; j < nc; j++) { 1545 cnt = 0; 1546 for (l = 0; l < s; l++) { 1547 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1548 } 1549 if (dfill) { 1550 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1551 } else { 1552 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1553 } 1554 if (rank < size - 1) { 1555 for (l = 0; l < s; l++) { 1556 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1557 } 1558 } 1559 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1560 for (l = 0; l < s; l++) { 1561 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1562 } 1563 } 1564 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1565 row++; 1566 } 1567 } 1568 PetscCall(PetscFree(cols)); 1569 /* do not copy values to GPU since they are all zero and not yet needed there */ 1570 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1571 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1574 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1575 } 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 /* ---------------------------------------------------------------------------------*/ 1580 1581 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1582 { 1583 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1584 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1585 PetscInt istart, iend; 1586 DMBoundaryType bx; 1587 ISLocalToGlobalMapping ltog, mltog; 1588 1589 PetscFunctionBegin; 1590 /* 1591 nc - number of components per grid point 1592 col - number of colors needed in one direction for single component problem 1593 */ 1594 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1595 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1596 col = 2 * s + 1; 1597 1598 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1599 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1600 1601 PetscCall(MatSetBlockSize(J, nc)); 1602 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1603 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1604 1605 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1606 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1607 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1608 1609 /* 1610 For each node in the grid: we get the neighbors in the local (on processor ordering 1611 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1612 PETSc ordering. 1613 */ 1614 if (!da->prealloc_only) { 1615 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1616 for (i = xs; i < xs + nx; i++) { 1617 istart = PetscMax(-s, gxs - i); 1618 iend = PetscMin(s, gxs + gnx - i - 1); 1619 slot = i - gxs; 1620 1621 cnt = 0; 1622 for (i1 = istart; i1 < iend + 1; i1++) { 1623 cols[cnt++] = nc * (slot + i1); 1624 for (l = 1; l < nc; l++) { 1625 cols[cnt] = 1 + cols[cnt - 1]; 1626 cnt++; 1627 } 1628 } 1629 rows[0] = nc * (slot); 1630 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1631 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1632 } 1633 /* do not copy values to GPU since they are all zero and not yet needed there */ 1634 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1635 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1636 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1637 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1638 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1639 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1640 PetscCall(PetscFree2(rows, cols)); 1641 } 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 /* ---------------------------------------------------------------------------------*/ 1646 1647 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1648 { 1649 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1650 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1651 PetscInt istart, iend; 1652 DMBoundaryType bx; 1653 ISLocalToGlobalMapping ltog, mltog; 1654 1655 PetscFunctionBegin; 1656 /* 1657 nc - number of components per grid point 1658 col - number of colors needed in one direction for single component problem 1659 */ 1660 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1661 col = 2 * s + 1; 1662 1663 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1664 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1665 1666 PetscCall(MatSetBlockSize(J, nc)); 1667 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1668 1669 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1670 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1671 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1672 1673 /* 1674 For each node in the grid: we get the neighbors in the local (on processor ordering 1675 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1676 PETSc ordering. 1677 */ 1678 if (!da->prealloc_only) { 1679 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1680 for (i = xs; i < xs + nx; i++) { 1681 istart = PetscMax(-s, gxs - i); 1682 iend = PetscMin(s, gxs + gnx - i - 1); 1683 slot = i - gxs; 1684 1685 cnt = 0; 1686 for (i1 = istart; i1 < iend + 1; i1++) { 1687 cols[cnt++] = nc * (slot + i1); 1688 for (l = 1; l < nc; l++) { 1689 cols[cnt] = 1 + cols[cnt - 1]; 1690 cnt++; 1691 } 1692 } 1693 rows[0] = nc * (slot); 1694 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1695 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1696 } 1697 /* do not copy values to GPU since they are all zero and not yet needed there */ 1698 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1699 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1700 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1701 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1702 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1703 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1704 PetscCall(PetscFree2(rows, cols)); 1705 } 1706 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1707 PetscFunctionReturn(PETSC_SUCCESS); 1708 } 1709 1710 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1711 { 1712 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1713 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1714 PetscInt istart, iend, jstart, jend, ii, jj; 1715 MPI_Comm comm; 1716 PetscScalar *values; 1717 DMBoundaryType bx, by; 1718 DMDAStencilType st; 1719 ISLocalToGlobalMapping ltog; 1720 1721 PetscFunctionBegin; 1722 /* 1723 nc - number of components per grid point 1724 col - number of colors needed in one direction for single component problem 1725 */ 1726 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1727 col = 2 * s + 1; 1728 1729 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1730 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1731 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1732 1733 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1734 1735 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1736 1737 /* determine the matrix preallocation information */ 1738 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1739 for (i = xs; i < xs + nx; i++) { 1740 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1741 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1742 for (j = ys; j < ys + ny; j++) { 1743 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1744 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1745 slot = i - gxs + gnx * (j - gys); 1746 1747 /* Find block columns in block row */ 1748 cnt = 0; 1749 for (ii = istart; ii < iend + 1; ii++) { 1750 for (jj = jstart; jj < jend + 1; jj++) { 1751 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1752 cols[cnt++] = slot + ii + gnx * jj; 1753 } 1754 } 1755 } 1756 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1757 } 1758 } 1759 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1760 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1761 MatPreallocateEnd(dnz, onz); 1762 1763 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1764 1765 /* 1766 For each node in the grid: we get the neighbors in the local (on processor ordering 1767 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1768 PETSc ordering. 1769 */ 1770 if (!da->prealloc_only) { 1771 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1772 for (i = xs; i < xs + nx; i++) { 1773 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1774 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1775 for (j = ys; j < ys + ny; j++) { 1776 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1777 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1778 slot = i - gxs + gnx * (j - gys); 1779 cnt = 0; 1780 for (ii = istart; ii < iend + 1; ii++) { 1781 for (jj = jstart; jj < jend + 1; jj++) { 1782 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1783 cols[cnt++] = slot + ii + gnx * jj; 1784 } 1785 } 1786 } 1787 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1788 } 1789 } 1790 PetscCall(PetscFree(values)); 1791 /* do not copy values to GPU since they are all zero and not yet needed there */ 1792 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1793 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1794 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1795 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1796 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1797 } 1798 PetscCall(PetscFree(cols)); 1799 PetscFunctionReturn(PETSC_SUCCESS); 1800 } 1801 1802 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1803 { 1804 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1805 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1806 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1807 MPI_Comm comm; 1808 PetscScalar *values; 1809 DMBoundaryType bx, by, bz; 1810 DMDAStencilType st; 1811 ISLocalToGlobalMapping ltog; 1812 1813 PetscFunctionBegin; 1814 /* 1815 nc - number of components per grid point 1816 col - number of colors needed in one direction for single component problem 1817 */ 1818 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1819 col = 2 * s + 1; 1820 1821 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1822 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1823 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1824 1825 PetscCall(PetscMalloc1(col * col * col, &cols)); 1826 1827 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1828 1829 /* determine the matrix preallocation information */ 1830 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1831 for (i = xs; i < xs + nx; i++) { 1832 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1833 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1834 for (j = ys; j < ys + ny; j++) { 1835 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1836 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1837 for (k = zs; k < zs + nz; k++) { 1838 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1839 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1840 1841 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1842 1843 /* Find block columns in block row */ 1844 cnt = 0; 1845 for (ii = istart; ii < iend + 1; ii++) { 1846 for (jj = jstart; jj < jend + 1; jj++) { 1847 for (kk = kstart; kk < kend + 1; kk++) { 1848 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1849 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1850 } 1851 } 1852 } 1853 } 1854 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1855 } 1856 } 1857 } 1858 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1859 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1860 MatPreallocateEnd(dnz, onz); 1861 1862 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1863 1864 /* 1865 For each node in the grid: we get the neighbors in the local (on processor ordering 1866 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1867 PETSc ordering. 1868 */ 1869 if (!da->prealloc_only) { 1870 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1871 for (i = xs; i < xs + nx; i++) { 1872 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1873 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1874 for (j = ys; j < ys + ny; j++) { 1875 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1876 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1877 for (k = zs; k < zs + nz; k++) { 1878 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1879 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1880 1881 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1882 1883 cnt = 0; 1884 for (ii = istart; ii < iend + 1; ii++) { 1885 for (jj = jstart; jj < jend + 1; jj++) { 1886 for (kk = kstart; kk < kend + 1; kk++) { 1887 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1888 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1889 } 1890 } 1891 } 1892 } 1893 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1894 } 1895 } 1896 } 1897 PetscCall(PetscFree(values)); 1898 /* do not copy values to GPU since they are all zero and not yet needed there */ 1899 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1900 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1901 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1902 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1903 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1904 } 1905 PetscCall(PetscFree(cols)); 1906 PetscFunctionReturn(PETSC_SUCCESS); 1907 } 1908 1909 /* 1910 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1911 identify in the local ordering with periodic domain. 1912 */ 1913 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1914 { 1915 PetscInt i, n; 1916 1917 PetscFunctionBegin; 1918 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1919 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1920 for (i = 0, n = 0; i < *cnt; i++) { 1921 if (col[i] >= *row) col[n++] = col[i]; 1922 } 1923 *cnt = n; 1924 PetscFunctionReturn(PETSC_SUCCESS); 1925 } 1926 1927 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1928 { 1929 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1930 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1931 PetscInt istart, iend, jstart, jend, ii, jj; 1932 MPI_Comm comm; 1933 PetscScalar *values; 1934 DMBoundaryType bx, by; 1935 DMDAStencilType st; 1936 ISLocalToGlobalMapping ltog; 1937 1938 PetscFunctionBegin; 1939 /* 1940 nc - number of components per grid point 1941 col - number of colors needed in one direction for single component problem 1942 */ 1943 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1944 col = 2 * s + 1; 1945 1946 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1947 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1948 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1949 1950 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1951 1952 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1953 1954 /* determine the matrix preallocation information */ 1955 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1956 for (i = xs; i < xs + nx; i++) { 1957 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1958 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1959 for (j = ys; j < ys + ny; j++) { 1960 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1961 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1962 slot = i - gxs + gnx * (j - gys); 1963 1964 /* Find block columns in block row */ 1965 cnt = 0; 1966 for (ii = istart; ii < iend + 1; ii++) { 1967 for (jj = jstart; jj < jend + 1; jj++) { 1968 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1969 } 1970 } 1971 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1972 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1973 } 1974 } 1975 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1976 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1977 MatPreallocateEnd(dnz, onz); 1978 1979 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1980 1981 /* 1982 For each node in the grid: we get the neighbors in the local (on processor ordering 1983 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1984 PETSc ordering. 1985 */ 1986 if (!da->prealloc_only) { 1987 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1988 for (i = xs; i < xs + nx; i++) { 1989 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1990 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1991 for (j = ys; j < ys + ny; j++) { 1992 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1993 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1994 slot = i - gxs + gnx * (j - gys); 1995 1996 /* Find block columns in block row */ 1997 cnt = 0; 1998 for (ii = istart; ii < iend + 1; ii++) { 1999 for (jj = jstart; jj < jend + 1; jj++) { 2000 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2001 } 2002 } 2003 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2004 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2005 } 2006 } 2007 PetscCall(PetscFree(values)); 2008 /* do not copy values to GPU since they are all zero and not yet needed there */ 2009 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2010 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2011 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2012 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2013 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2014 } 2015 PetscCall(PetscFree(cols)); 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2020 { 2021 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2022 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2023 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2024 MPI_Comm comm; 2025 PetscScalar *values; 2026 DMBoundaryType bx, by, bz; 2027 DMDAStencilType st; 2028 ISLocalToGlobalMapping ltog; 2029 2030 PetscFunctionBegin; 2031 /* 2032 nc - number of components per grid point 2033 col - number of colors needed in one direction for single component problem 2034 */ 2035 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2036 col = 2 * s + 1; 2037 2038 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2039 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2040 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2041 2042 /* create the matrix */ 2043 PetscCall(PetscMalloc1(col * col * col, &cols)); 2044 2045 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2046 2047 /* determine the matrix preallocation information */ 2048 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2049 for (i = xs; i < xs + nx; i++) { 2050 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2051 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2052 for (j = ys; j < ys + ny; j++) { 2053 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2054 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2055 for (k = zs; k < zs + nz; k++) { 2056 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2057 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2058 2059 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2060 2061 /* Find block columns in block row */ 2062 cnt = 0; 2063 for (ii = istart; ii < iend + 1; ii++) { 2064 for (jj = jstart; jj < jend + 1; jj++) { 2065 for (kk = kstart; kk < kend + 1; kk++) { 2066 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2067 } 2068 } 2069 } 2070 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2071 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2072 } 2073 } 2074 } 2075 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2076 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2077 MatPreallocateEnd(dnz, onz); 2078 2079 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2080 2081 /* 2082 For each node in the grid: we get the neighbors in the local (on processor ordering 2083 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2084 PETSc ordering. 2085 */ 2086 if (!da->prealloc_only) { 2087 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2088 for (i = xs; i < xs + nx; i++) { 2089 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2090 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2091 for (j = ys; j < ys + ny; j++) { 2092 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2093 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2094 for (k = zs; k < zs + nz; k++) { 2095 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2096 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2097 2098 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2099 2100 cnt = 0; 2101 for (ii = istart; ii < iend + 1; ii++) { 2102 for (jj = jstart; jj < jend + 1; jj++) { 2103 for (kk = kstart; kk < kend + 1; kk++) { 2104 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2105 } 2106 } 2107 } 2108 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2109 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2110 } 2111 } 2112 } 2113 PetscCall(PetscFree(values)); 2114 /* do not copy values to GPU since they are all zero and not yet needed there */ 2115 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2116 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2117 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2118 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2119 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2120 } 2121 PetscCall(PetscFree(cols)); 2122 PetscFunctionReturn(PETSC_SUCCESS); 2123 } 2124 2125 /* ---------------------------------------------------------------------------------*/ 2126 2127 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2128 { 2129 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2130 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2131 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2132 DM_DA *dd = (DM_DA *)da->data; 2133 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2134 MPI_Comm comm; 2135 PetscScalar *values; 2136 DMBoundaryType bx, by, bz; 2137 ISLocalToGlobalMapping ltog; 2138 DMDAStencilType st; 2139 PetscBool removedups = PETSC_FALSE; 2140 2141 PetscFunctionBegin; 2142 /* 2143 nc - number of components per grid point 2144 col - number of colors needed in one direction for single component problem 2145 */ 2146 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2147 col = 2 * s + 1; 2148 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 2149 by 2*stencil_width + 1\n"); 2150 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 2151 by 2*stencil_width + 1\n"); 2152 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 2153 by 2*stencil_width + 1\n"); 2154 2155 /* 2156 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2157 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2158 */ 2159 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2160 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2161 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2162 2163 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2164 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2165 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2166 2167 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2168 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2169 2170 /* determine the matrix preallocation information */ 2171 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2172 2173 PetscCall(MatSetBlockSize(J, nc)); 2174 for (i = xs; i < xs + nx; i++) { 2175 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2176 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2177 for (j = ys; j < ys + ny; j++) { 2178 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2179 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2180 for (k = zs; k < zs + nz; k++) { 2181 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2182 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2183 2184 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2185 2186 for (l = 0; l < nc; l++) { 2187 cnt = 0; 2188 for (ii = istart; ii < iend + 1; ii++) { 2189 for (jj = jstart; jj < jend + 1; jj++) { 2190 for (kk = kstart; kk < kend + 1; kk++) { 2191 if (ii || jj || kk) { 2192 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2193 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2194 } 2195 } else { 2196 if (dfill) { 2197 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2198 } else { 2199 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2200 } 2201 } 2202 } 2203 } 2204 } 2205 row = l + nc * (slot); 2206 maxcnt = PetscMax(maxcnt, cnt); 2207 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2208 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2209 } 2210 } 2211 } 2212 } 2213 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2214 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2215 MatPreallocateEnd(dnz, onz); 2216 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2217 2218 /* 2219 For each node in the grid: we get the neighbors in the local (on processor ordering 2220 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2221 PETSc ordering. 2222 */ 2223 if (!da->prealloc_only) { 2224 PetscCall(PetscCalloc1(maxcnt, &values)); 2225 for (i = xs; i < xs + nx; i++) { 2226 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2227 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2228 for (j = ys; j < ys + ny; j++) { 2229 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2230 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2231 for (k = zs; k < zs + nz; k++) { 2232 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2233 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2234 2235 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2236 2237 for (l = 0; l < nc; l++) { 2238 cnt = 0; 2239 for (ii = istart; ii < iend + 1; ii++) { 2240 for (jj = jstart; jj < jend + 1; jj++) { 2241 for (kk = kstart; kk < kend + 1; kk++) { 2242 if (ii || jj || kk) { 2243 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2244 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2245 } 2246 } else { 2247 if (dfill) { 2248 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2249 } else { 2250 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2251 } 2252 } 2253 } 2254 } 2255 } 2256 row = l + nc * (slot); 2257 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2258 } 2259 } 2260 } 2261 } 2262 PetscCall(PetscFree(values)); 2263 /* do not copy values to GPU since they are all zero and not yet needed there */ 2264 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2265 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2266 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2267 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2268 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2269 } 2270 PetscCall(PetscFree(cols)); 2271 PetscFunctionReturn(PETSC_SUCCESS); 2272 } 2273