1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(PETSC_SUCCESS); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(PETSC_SUCCESS); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by `DMCreateMatrix()`. 88 89 Logically Collective on da 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 `MATMPIAIJ` matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 .vb 105 dfill[9] = {1, 0, 0, 106 1, 1, 0, 107 0, 1, 1} 108 .ve 109 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 110 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 111 diagonal block). 112 113 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 114 can be represented in the dfill, ofill format 115 116 Contributed by Glenn Hammond 117 118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 119 @*/ 120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 121 { 122 DM_DA *dd = (DM_DA *)da->data; 123 124 PetscFunctionBegin; 125 /* save the given dfill and ofill information */ 126 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 127 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 128 129 /* count nonzeros in ofill columns */ 130 PetscCall(DMDASetBlockFills_Private2(dd)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*@ 135 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 136 of the matrix returned by `DMCreateMatrix()`, using sparse representations 137 of fill patterns. 138 139 Logically Collective on da 140 141 Input Parameters: 142 + da - the distributed array 143 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 144 - ofill - the sparse fill pattern in the off-diagonal blocks 145 146 Level: developer 147 148 Notes: 149 This only makes sense when you are doing multicomponent problems but using the 150 `MATMPIAIJ` matrix format 151 152 The format for dfill and ofill is a sparse representation of a 153 dof-by-dof matrix with 1 entries representing coupling and 0 entries 154 for missing coupling. The sparse representation is a 1 dimensional 155 array of length nz + dof + 1, where nz is the number of non-zeros in 156 the matrix. The first dof entries in the array give the 157 starting array indices of each row's items in the rest of the array, 158 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 159 and the remaining nz items give the column indices of each of 160 the 1s within the logical 2D matrix. Each row's items within 161 the array are the column indices of the 1s within that row 162 of the 2D matrix. PETSc developers may recognize that this is the 163 same format as that computed by the `DMDASetBlockFills_Private()` 164 function from a dense 2D matrix representation. 165 166 `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then 167 can be represented in the dfill, ofill format 168 169 Contributed by Philip C. Roth 170 171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 172 @*/ 173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 174 { 175 DM_DA *dd = (DM_DA *)da->data; 176 177 PetscFunctionBegin; 178 /* save the given dfill and ofill information */ 179 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 180 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 181 182 /* count nonzeros in ofill columns */ 183 PetscCall(DMDASetBlockFills_Private2(dd)); 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 188 { 189 PetscInt dim, m, n, p, nc; 190 DMBoundaryType bx, by, bz; 191 MPI_Comm comm; 192 PetscMPIInt size; 193 PetscBool isBAIJ; 194 DM_DA *dd = (DM_DA *)da->data; 195 196 PetscFunctionBegin; 197 /* 198 m 199 ------------------------------------------------------ 200 | | 201 | | 202 | ---------------------- | 203 | | | | 204 n | yn | | | 205 | | | | 206 | .--------------------- | 207 | (xs,ys) xn | 208 | . | 209 | (gxs,gys) | 210 | | 211 ----------------------------------------------------- 212 */ 213 214 /* 215 nc - number of components per grid point 216 col - number of colors needed in one direction for single component problem 217 218 */ 219 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 220 221 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 222 PetscCallMPI(MPI_Comm_size(comm, &size)); 223 if (ctype == IS_COLORING_LOCAL) { 224 if (size == 1) { 225 ctype = IS_COLORING_GLOBAL; 226 } else { 227 PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 231 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 232 matrices is for the blocks, not the individual matrix elements */ 233 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 234 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 236 if (isBAIJ) { 237 dd->w = 1; 238 dd->xs = dd->xs / nc; 239 dd->xe = dd->xe / nc; 240 dd->Xs = dd->Xs / nc; 241 dd->Xe = dd->Xe / nc; 242 } 243 244 /* 245 We do not provide a getcoloring function in the DMDA operations because 246 the basic DMDA does not know about matrices. We think of DMDA as being 247 more low-level then matrices. 248 */ 249 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 250 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 252 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 253 if (isBAIJ) { 254 dd->w = nc; 255 dd->xs = dd->xs * nc; 256 dd->xe = dd->xe * nc; 257 dd->Xs = dd->Xs * nc; 258 dd->Xe = dd->Xe * nc; 259 } 260 PetscFunctionReturn(PETSC_SUCCESS); 261 } 262 263 /* ---------------------------------------------------------------------------------*/ 264 265 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 266 { 267 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 268 PetscInt ncolors = 0; 269 MPI_Comm comm; 270 DMBoundaryType bx, by; 271 DMDAStencilType st; 272 ISColoringValue *colors; 273 DM_DA *dd = (DM_DA *)da->data; 274 275 PetscFunctionBegin; 276 /* 277 nc - number of components per grid point 278 col - number of colors needed in one direction for single component problem 279 280 */ 281 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 282 col = 2 * s + 1; 283 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 284 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 285 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 286 287 /* special case as taught to us by Paul Hovland */ 288 if (st == DMDA_STENCIL_STAR && s == 1) { 289 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 290 } else { 291 if (ctype == IS_COLORING_GLOBAL) { 292 if (!dd->localcoloring) { 293 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 294 ii = 0; 295 for (j = ys; j < ys + ny; j++) { 296 for (i = xs; i < xs + nx; i++) { 297 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 298 } 299 } 300 ncolors = nc + nc * (col - 1 + col * (col - 1)); 301 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 302 } 303 *coloring = dd->localcoloring; 304 } else if (ctype == IS_COLORING_LOCAL) { 305 if (!dd->ghostedcoloring) { 306 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 307 ii = 0; 308 for (j = gys; j < gys + gny; j++) { 309 for (i = gxs; i < gxs + gnx; i++) { 310 for (k = 0; k < nc; k++) { 311 /* the complicated stuff is to handle periodic boundaries */ 312 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 313 } 314 } 315 } 316 ncolors = nc + nc * (col - 1 + col * (col - 1)); 317 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 318 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 319 320 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 321 } 322 *coloring = dd->ghostedcoloring; 323 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 324 } 325 PetscCall(ISColoringReference(*coloring)); 326 PetscFunctionReturn(PETSC_SUCCESS); 327 } 328 329 /* ---------------------------------------------------------------------------------*/ 330 331 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 332 { 333 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 334 PetscInt ncolors; 335 MPI_Comm comm; 336 DMBoundaryType bx, by, bz; 337 DMDAStencilType st; 338 ISColoringValue *colors; 339 DM_DA *dd = (DM_DA *)da->data; 340 341 PetscFunctionBegin; 342 /* 343 nc - number of components per grid point 344 col - number of colors needed in one direction for single component problem 345 */ 346 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 347 col = 2 * s + 1; 348 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 349 by 2*stencil_width + 1\n"); 350 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 351 by 2*stencil_width + 1\n"); 352 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 353 by 2*stencil_width + 1\n"); 354 355 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 356 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 357 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 358 359 /* create the coloring */ 360 if (ctype == IS_COLORING_GLOBAL) { 361 if (!dd->localcoloring) { 362 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 363 ii = 0; 364 for (k = zs; k < zs + nz; k++) { 365 for (j = ys; j < ys + ny; j++) { 366 for (i = xs; i < xs + nx; i++) { 367 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 368 } 369 } 370 } 371 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 372 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 373 } 374 *coloring = dd->localcoloring; 375 } else if (ctype == IS_COLORING_LOCAL) { 376 if (!dd->ghostedcoloring) { 377 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 378 ii = 0; 379 for (k = gzs; k < gzs + gnz; k++) { 380 for (j = gys; j < gys + gny; j++) { 381 for (i = gxs; i < gxs + gnx; i++) { 382 for (l = 0; l < nc; l++) { 383 /* the complicated stuff is to handle periodic boundaries */ 384 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 385 } 386 } 387 } 388 } 389 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 390 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 391 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 392 } 393 *coloring = dd->ghostedcoloring; 394 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 395 PetscCall(ISColoringReference(*coloring)); 396 PetscFunctionReturn(PETSC_SUCCESS); 397 } 398 399 /* ---------------------------------------------------------------------------------*/ 400 401 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 402 { 403 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 404 PetscInt ncolors; 405 MPI_Comm comm; 406 DMBoundaryType bx; 407 ISColoringValue *colors; 408 DM_DA *dd = (DM_DA *)da->data; 409 410 PetscFunctionBegin; 411 /* 412 nc - number of components per grid point 413 col - number of colors needed in one direction for single component problem 414 */ 415 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 416 col = 2 * s + 1; 417 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 418 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 419 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 420 421 /* create the coloring */ 422 if (ctype == IS_COLORING_GLOBAL) { 423 if (!dd->localcoloring) { 424 PetscCall(PetscMalloc1(nc * nx, &colors)); 425 if (dd->ofillcols) { 426 PetscInt tc = 0; 427 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 428 i1 = 0; 429 for (i = xs; i < xs + nx; i++) { 430 for (l = 0; l < nc; l++) { 431 if (dd->ofillcols[l] && (i % col)) { 432 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 433 } else { 434 colors[i1++] = l; 435 } 436 } 437 } 438 ncolors = nc + 2 * s * tc; 439 } else { 440 i1 = 0; 441 for (i = xs; i < xs + nx; i++) { 442 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 443 } 444 ncolors = nc + nc * (col - 1); 445 } 446 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 447 } 448 *coloring = dd->localcoloring; 449 } else if (ctype == IS_COLORING_LOCAL) { 450 if (!dd->ghostedcoloring) { 451 PetscCall(PetscMalloc1(nc * gnx, &colors)); 452 i1 = 0; 453 for (i = gxs; i < gxs + gnx; i++) { 454 for (l = 0; l < nc; l++) { 455 /* the complicated stuff is to handle periodic boundaries */ 456 colors[i1++] = l + nc * (SetInRange(i, m) % col); 457 } 458 } 459 ncolors = nc + nc * (col - 1); 460 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 461 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 462 } 463 *coloring = dd->ghostedcoloring; 464 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 465 PetscCall(ISColoringReference(*coloring)); 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 470 { 471 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 472 PetscInt ncolors; 473 MPI_Comm comm; 474 DMBoundaryType bx, by; 475 ISColoringValue *colors; 476 DM_DA *dd = (DM_DA *)da->data; 477 478 PetscFunctionBegin; 479 /* 480 nc - number of components per grid point 481 col - number of colors needed in one direction for single component problem 482 */ 483 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 484 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 485 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 486 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 487 /* create the coloring */ 488 if (ctype == IS_COLORING_GLOBAL) { 489 if (!dd->localcoloring) { 490 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 491 ii = 0; 492 for (j = ys; j < ys + ny; j++) { 493 for (i = xs; i < xs + nx; i++) { 494 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 495 } 496 } 497 ncolors = 5 * nc; 498 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 499 } 500 *coloring = dd->localcoloring; 501 } else if (ctype == IS_COLORING_LOCAL) { 502 if (!dd->ghostedcoloring) { 503 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 504 ii = 0; 505 for (j = gys; j < gys + gny; j++) { 506 for (i = gxs; i < gxs + gnx; i++) { 507 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 508 } 509 } 510 ncolors = 5 * nc; 511 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 512 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 513 } 514 *coloring = dd->ghostedcoloring; 515 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 516 PetscFunctionReturn(PETSC_SUCCESS); 517 } 518 519 /* =========================================================================== */ 520 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 529 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 530 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 531 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 532 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 533 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 534 535 /*@C 536 MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix 537 538 Logically Collective on mat 539 540 Input Parameters: 541 + mat - the matrix 542 - da - the da 543 544 Level: intermediate 545 546 .seealso: `Mat`, `MatSetUp()` 547 @*/ 548 PetscErrorCode MatSetupDM(Mat mat, DM da) 549 { 550 PetscFunctionBegin; 551 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 552 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 553 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 558 { 559 DM da; 560 const char *prefix; 561 Mat Anatural; 562 AO ao; 563 PetscInt rstart, rend, *petsc, i; 564 IS is; 565 MPI_Comm comm; 566 PetscViewerFormat format; 567 568 PetscFunctionBegin; 569 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 570 PetscCall(PetscViewerGetFormat(viewer, &format)); 571 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS); 572 573 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 574 PetscCall(MatGetDM(A, &da)); 575 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 576 577 PetscCall(DMDAGetAO(da, &ao)); 578 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 579 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 580 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 581 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 582 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 583 584 /* call viewer on natural ordering */ 585 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 586 PetscCall(ISDestroy(&is)); 587 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 588 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 589 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 590 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 591 PetscCall(MatView(Anatural, viewer)); 592 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 593 PetscCall(MatDestroy(&Anatural)); 594 PetscFunctionReturn(PETSC_SUCCESS); 595 } 596 597 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 598 { 599 DM da; 600 Mat Anatural, Aapp; 601 AO ao; 602 PetscInt rstart, rend, *app, i, m, n, M, N; 603 IS is; 604 MPI_Comm comm; 605 606 PetscFunctionBegin; 607 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 608 PetscCall(MatGetDM(A, &da)); 609 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 610 611 /* Load the matrix in natural ordering */ 612 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 613 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 614 PetscCall(MatGetSize(A, &M, &N)); 615 PetscCall(MatGetLocalSize(A, &m, &n)); 616 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 617 PetscCall(MatLoad(Anatural, viewer)); 618 619 /* Map natural ordering to application ordering and create IS */ 620 PetscCall(DMDAGetAO(da, &ao)); 621 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 622 PetscCall(PetscMalloc1(rend - rstart, &app)); 623 for (i = rstart; i < rend; i++) app[i - rstart] = i; 624 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 625 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 626 627 /* Do permutation and replace header */ 628 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 629 PetscCall(MatHeaderReplace(A, &Aapp)); 630 PetscCall(ISDestroy(&is)); 631 PetscCall(MatDestroy(&Anatural)); 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 635 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 636 { 637 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 638 Mat A; 639 MPI_Comm comm; 640 MatType Atype; 641 MatType mtype; 642 PetscMPIInt size; 643 DM_DA *dd = (DM_DA *)da->data; 644 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 645 646 PetscFunctionBegin; 647 PetscCall(MatInitializePackage()); 648 mtype = da->mattype; 649 650 /* 651 m 652 ------------------------------------------------------ 653 | | 654 | | 655 | ---------------------- | 656 | | | | 657 n | ny | | | 658 | | | | 659 | .--------------------- | 660 | (xs,ys) nx | 661 | . | 662 | (gxs,gys) | 663 | | 664 ----------------------------------------------------- 665 */ 666 667 /* 668 nc - number of components per grid point 669 col - number of colors needed in one direction for single component problem 670 */ 671 M = dd->M; 672 N = dd->N; 673 P = dd->P; 674 dim = da->dim; 675 dof = dd->w; 676 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 677 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 678 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 679 PetscCall(MatCreate(comm, &A)); 680 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 681 PetscCall(MatSetType(A, mtype)); 682 PetscCall(MatSetFromOptions(A)); 683 if (dof * nx * ny * nz < da->bind_below) { 684 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 685 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 686 } 687 PetscCall(MatSetDM(A, da)); 688 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 689 PetscCall(MatGetType(A, &Atype)); 690 /* 691 We do not provide a getmatrix function in the DMDA operations because 692 the basic DMDA does not know about matrices. We think of DMDA as being more 693 more low-level than matrices. This is kind of cheating but, cause sometimes 694 we think of DMDA has higher level than matrices. 695 696 We could switch based on Atype (or mtype), but we do not since the 697 specialized setting routines depend only on the particular preallocation 698 details of the matrix, not the type itself. 699 */ 700 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 701 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 702 if (!aij) { 703 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 704 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 705 if (!baij) { 706 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 707 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 708 if (!sbaij) { 709 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 710 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 711 } 712 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 713 } 714 } 715 if (aij) { 716 if (dim == 1) { 717 if (dd->ofill) { 718 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 719 } else { 720 DMBoundaryType bx; 721 PetscMPIInt size; 722 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 723 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 724 if (size == 1 && bx == DM_BOUNDARY_NONE) { 725 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 726 } else { 727 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 728 } 729 } 730 } else if (dim == 2) { 731 if (dd->ofill) { 732 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 733 } else { 734 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 735 } 736 } else if (dim == 3) { 737 if (dd->ofill) { 738 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 739 } else { 740 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 741 } 742 } 743 } else if (baij) { 744 if (dim == 2) { 745 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 746 } else if (dim == 3) { 747 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 748 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 749 } else if (sbaij) { 750 if (dim == 2) { 751 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 752 } else if (dim == 3) { 753 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 754 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 755 } else if (sell) { 756 if (dim == 2) { 757 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 758 } else if (dim == 3) { 759 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 760 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 761 } else if (is) { 762 PetscCall(DMCreateMatrix_DA_IS(da, A)); 763 } else { 764 ISLocalToGlobalMapping ltog; 765 766 PetscCall(MatSetBlockSize(A, dof)); 767 PetscCall(MatSetUp(A)); 768 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 769 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 770 } 771 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 772 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 773 PetscCall(MatSetDM(A, da)); 774 PetscCallMPI(MPI_Comm_size(comm, &size)); 775 if (size > 1) { 776 /* change viewer to display matrix in natural ordering */ 777 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 778 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 779 } 780 *J = A; 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 /* ---------------------------------------------------------------------------------*/ 785 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 786 787 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 788 { 789 DM_DA *da = (DM_DA *)dm->data; 790 Mat lJ, P; 791 ISLocalToGlobalMapping ltog; 792 IS is; 793 PetscBT bt; 794 const PetscInt *e_loc, *idx; 795 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 796 797 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 798 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 799 PetscFunctionBegin; 800 dof = da->w; 801 PetscCall(MatSetBlockSize(J, dof)); 802 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 803 804 /* flag local elements indices in local DMDA numbering */ 805 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 806 PetscCall(PetscBTCreate(nv / dof, &bt)); 807 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 808 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 809 810 /* filter out (set to -1) the global indices not used by the local elements */ 811 PetscCall(PetscMalloc1(nv / dof, &gidx)); 812 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 813 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 814 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 815 for (i = 0; i < nv / dof; i++) 816 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 817 PetscCall(PetscBTDestroy(&bt)); 818 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 819 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 820 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 821 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 822 PetscCall(ISDestroy(&is)); 823 824 /* Preallocation */ 825 if (dm->prealloc_skip) { 826 PetscCall(MatSetUp(J)); 827 } else { 828 PetscCall(MatISGetLocalMat(J, &lJ)); 829 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 830 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 831 PetscCall(MatSetType(P, MATPREALLOCATOR)); 832 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 833 PetscCall(MatGetSize(lJ, &N, NULL)); 834 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 835 PetscCall(MatSetSizes(P, n, n, N, N)); 836 PetscCall(MatSetBlockSize(P, dof)); 837 PetscCall(MatSetUp(P)); 838 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 839 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 840 PetscCall(MatISRestoreLocalMat(J, &lJ)); 841 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 842 PetscCall(MatDestroy(&P)); 843 844 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 845 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 846 } 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 851 { 852 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 853 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 854 MPI_Comm comm; 855 PetscScalar *values; 856 DMBoundaryType bx, by; 857 ISLocalToGlobalMapping ltog; 858 DMDAStencilType st; 859 860 PetscFunctionBegin; 861 /* 862 nc - number of components per grid point 863 col - number of colors needed in one direction for single component problem 864 */ 865 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 866 col = 2 * s + 1; 867 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 868 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 869 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 870 871 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 872 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 873 874 PetscCall(MatSetBlockSize(J, nc)); 875 /* determine the matrix preallocation information */ 876 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 877 for (i = xs; i < xs + nx; i++) { 878 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 879 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 880 881 for (j = ys; j < ys + ny; j++) { 882 slot = i - gxs + gnx * (j - gys); 883 884 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 885 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 886 887 cnt = 0; 888 for (k = 0; k < nc; k++) { 889 for (l = lstart; l < lend + 1; l++) { 890 for (p = pstart; p < pend + 1; p++) { 891 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 892 cols[cnt++] = k + nc * (slot + gnx * l + p); 893 } 894 } 895 } 896 rows[k] = k + nc * (slot); 897 } 898 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 899 } 900 } 901 PetscCall(MatSetBlockSize(J, nc)); 902 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 903 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 904 MatPreallocateEnd(dnz, onz); 905 906 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 907 908 /* 909 For each node in the grid: we get the neighbors in the local (on processor ordering 910 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 911 PETSc ordering. 912 */ 913 if (!da->prealloc_only) { 914 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 915 for (i = xs; i < xs + nx; i++) { 916 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 917 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 918 919 for (j = ys; j < ys + ny; j++) { 920 slot = i - gxs + gnx * (j - gys); 921 922 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 923 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 924 925 cnt = 0; 926 for (k = 0; k < nc; k++) { 927 for (l = lstart; l < lend + 1; l++) { 928 for (p = pstart; p < pend + 1; p++) { 929 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 930 cols[cnt++] = k + nc * (slot + gnx * l + p); 931 } 932 } 933 } 934 rows[k] = k + nc * (slot); 935 } 936 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 937 } 938 } 939 PetscCall(PetscFree(values)); 940 /* do not copy values to GPU since they are all zero and not yet needed there */ 941 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 942 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 943 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 944 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 945 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 946 } 947 PetscCall(PetscFree2(rows, cols)); 948 PetscFunctionReturn(PETSC_SUCCESS); 949 } 950 951 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 952 { 953 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 954 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 955 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 956 MPI_Comm comm; 957 PetscScalar *values; 958 DMBoundaryType bx, by, bz; 959 ISLocalToGlobalMapping ltog; 960 DMDAStencilType st; 961 962 PetscFunctionBegin; 963 /* 964 nc - number of components per grid point 965 col - number of colors needed in one direction for single component problem 966 */ 967 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 968 col = 2 * s + 1; 969 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 970 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 971 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 972 973 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 974 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 975 976 PetscCall(MatSetBlockSize(J, nc)); 977 /* determine the matrix preallocation information */ 978 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 979 for (i = xs; i < xs + nx; i++) { 980 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 981 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 982 for (j = ys; j < ys + ny; j++) { 983 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 984 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 985 for (k = zs; k < zs + nz; k++) { 986 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 987 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 988 989 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 990 991 cnt = 0; 992 for (l = 0; l < nc; l++) { 993 for (ii = istart; ii < iend + 1; ii++) { 994 for (jj = jstart; jj < jend + 1; jj++) { 995 for (kk = kstart; kk < kend + 1; kk++) { 996 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 997 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 998 } 999 } 1000 } 1001 } 1002 rows[l] = l + nc * (slot); 1003 } 1004 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1005 } 1006 } 1007 } 1008 PetscCall(MatSetBlockSize(J, nc)); 1009 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1010 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1011 MatPreallocateEnd(dnz, onz); 1012 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1013 1014 /* 1015 For each node in the grid: we get the neighbors in the local (on processor ordering 1016 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1017 PETSc ordering. 1018 */ 1019 if (!da->prealloc_only) { 1020 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1021 for (i = xs; i < xs + nx; i++) { 1022 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1023 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1024 for (j = ys; j < ys + ny; j++) { 1025 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1026 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1027 for (k = zs; k < zs + nz; k++) { 1028 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1029 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1030 1031 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1032 1033 cnt = 0; 1034 for (l = 0; l < nc; l++) { 1035 for (ii = istart; ii < iend + 1; ii++) { 1036 for (jj = jstart; jj < jend + 1; jj++) { 1037 for (kk = kstart; kk < kend + 1; kk++) { 1038 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1039 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1040 } 1041 } 1042 } 1043 } 1044 rows[l] = l + nc * (slot); 1045 } 1046 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1047 } 1048 } 1049 } 1050 PetscCall(PetscFree(values)); 1051 /* do not copy values to GPU since they are all zero and not yet needed there */ 1052 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1053 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1054 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1055 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1056 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1057 } 1058 PetscCall(PetscFree2(rows, cols)); 1059 PetscFunctionReturn(PETSC_SUCCESS); 1060 } 1061 1062 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1063 { 1064 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1065 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1066 MPI_Comm comm; 1067 DMBoundaryType bx, by; 1068 ISLocalToGlobalMapping ltog, mltog; 1069 DMDAStencilType st; 1070 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1071 1072 PetscFunctionBegin; 1073 /* 1074 nc - number of components per grid point 1075 col - number of colors needed in one direction for single component problem 1076 */ 1077 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1078 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1079 col = 2 * s + 1; 1080 /* 1081 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1082 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1083 */ 1084 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1085 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1086 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1087 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1088 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1089 1090 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1091 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1092 1093 PetscCall(MatSetBlockSize(J, nc)); 1094 /* determine the matrix preallocation information */ 1095 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1096 for (i = xs; i < xs + nx; i++) { 1097 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1098 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1099 1100 for (j = ys; j < ys + ny; j++) { 1101 slot = i - gxs + gnx * (j - gys); 1102 1103 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1104 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1105 1106 cnt = 0; 1107 for (k = 0; k < nc; k++) { 1108 for (l = lstart; l < lend + 1; l++) { 1109 for (p = pstart; p < pend + 1; p++) { 1110 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1111 cols[cnt++] = k + nc * (slot + gnx * l + p); 1112 } 1113 } 1114 } 1115 rows[k] = k + nc * (slot); 1116 } 1117 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1118 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1119 } 1120 } 1121 PetscCall(MatSetBlockSize(J, nc)); 1122 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1123 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1124 MatPreallocateEnd(dnz, onz); 1125 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1126 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1127 1128 /* 1129 For each node in the grid: we get the neighbors in the local (on processor ordering 1130 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1131 PETSc ordering. 1132 */ 1133 if (!da->prealloc_only) { 1134 for (i = xs; i < xs + nx; i++) { 1135 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1136 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1137 1138 for (j = ys; j < ys + ny; j++) { 1139 slot = i - gxs + gnx * (j - gys); 1140 1141 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1142 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1143 1144 cnt = 0; 1145 for (l = lstart; l < lend + 1; l++) { 1146 for (p = pstart; p < pend + 1; p++) { 1147 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1148 cols[cnt++] = nc * (slot + gnx * l + p); 1149 for (k = 1; k < nc; k++) { 1150 cols[cnt] = 1 + cols[cnt - 1]; 1151 cnt++; 1152 } 1153 } 1154 } 1155 } 1156 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1157 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1158 } 1159 } 1160 /* do not copy values to GPU since they are all zero and not yet needed there */ 1161 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1162 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1163 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1164 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1165 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1166 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1167 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1168 } 1169 PetscCall(PetscFree2(rows, cols)); 1170 PetscFunctionReturn(PETSC_SUCCESS); 1171 } 1172 1173 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1174 { 1175 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1176 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1177 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1178 DM_DA *dd = (DM_DA *)da->data; 1179 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1180 MPI_Comm comm; 1181 DMBoundaryType bx, by; 1182 ISLocalToGlobalMapping ltog; 1183 DMDAStencilType st; 1184 PetscBool removedups = PETSC_FALSE; 1185 1186 PetscFunctionBegin; 1187 /* 1188 nc - number of components per grid point 1189 col - number of colors needed in one direction for single component problem 1190 */ 1191 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1192 col = 2 * s + 1; 1193 /* 1194 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1195 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1196 */ 1197 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1198 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1199 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1200 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1201 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1202 1203 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1204 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1205 1206 PetscCall(MatSetBlockSize(J, nc)); 1207 /* determine the matrix preallocation information */ 1208 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1209 for (i = xs; i < xs + nx; i++) { 1210 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1211 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1212 1213 for (j = ys; j < ys + ny; j++) { 1214 slot = i - gxs + gnx * (j - gys); 1215 1216 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1217 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1218 1219 for (k = 0; k < nc; k++) { 1220 cnt = 0; 1221 for (l = lstart; l < lend + 1; l++) { 1222 for (p = pstart; p < pend + 1; p++) { 1223 if (l || p) { 1224 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1225 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1226 } 1227 } else { 1228 if (dfill) { 1229 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1230 } else { 1231 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1232 } 1233 } 1234 } 1235 } 1236 row = k + nc * (slot); 1237 maxcnt = PetscMax(maxcnt, cnt); 1238 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1239 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1240 } 1241 } 1242 } 1243 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1244 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1245 MatPreallocateEnd(dnz, onz); 1246 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1247 1248 /* 1249 For each node in the grid: we get the neighbors in the local (on processor ordering 1250 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1251 PETSc ordering. 1252 */ 1253 if (!da->prealloc_only) { 1254 for (i = xs; i < xs + nx; i++) { 1255 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1256 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1257 1258 for (j = ys; j < ys + ny; j++) { 1259 slot = i - gxs + gnx * (j - gys); 1260 1261 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1262 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1263 1264 for (k = 0; k < nc; k++) { 1265 cnt = 0; 1266 for (l = lstart; l < lend + 1; l++) { 1267 for (p = pstart; p < pend + 1; p++) { 1268 if (l || p) { 1269 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1270 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1271 } 1272 } else { 1273 if (dfill) { 1274 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1275 } else { 1276 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1277 } 1278 } 1279 } 1280 } 1281 row = k + nc * (slot); 1282 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1283 } 1284 } 1285 } 1286 /* do not copy values to GPU since they are all zero and not yet needed there */ 1287 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1288 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1289 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1290 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1291 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1292 } 1293 PetscCall(PetscFree(cols)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 /* ---------------------------------------------------------------------------------*/ 1298 1299 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1300 { 1301 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1302 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1303 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1304 MPI_Comm comm; 1305 DMBoundaryType bx, by, bz; 1306 ISLocalToGlobalMapping ltog, mltog; 1307 DMDAStencilType st; 1308 PetscBool removedups = PETSC_FALSE; 1309 1310 PetscFunctionBegin; 1311 /* 1312 nc - number of components per grid point 1313 col - number of colors needed in one direction for single component problem 1314 */ 1315 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1316 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1317 col = 2 * s + 1; 1318 1319 /* 1320 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1321 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1322 */ 1323 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1324 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1325 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1326 1327 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1328 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1329 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1330 1331 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1332 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1333 1334 PetscCall(MatSetBlockSize(J, nc)); 1335 /* determine the matrix preallocation information */ 1336 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1337 for (i = xs; i < xs + nx; i++) { 1338 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1339 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1340 for (j = ys; j < ys + ny; j++) { 1341 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1342 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1343 for (k = zs; k < zs + nz; k++) { 1344 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1345 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1346 1347 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1348 1349 cnt = 0; 1350 for (l = 0; l < nc; l++) { 1351 for (ii = istart; ii < iend + 1; ii++) { 1352 for (jj = jstart; jj < jend + 1; jj++) { 1353 for (kk = kstart; kk < kend + 1; kk++) { 1354 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1355 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1356 } 1357 } 1358 } 1359 } 1360 rows[l] = l + nc * (slot); 1361 } 1362 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1363 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1364 } 1365 } 1366 } 1367 PetscCall(MatSetBlockSize(J, nc)); 1368 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1369 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1370 MatPreallocateEnd(dnz, onz); 1371 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1372 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1373 1374 /* 1375 For each node in the grid: we get the neighbors in the local (on processor ordering 1376 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1377 PETSc ordering. 1378 */ 1379 if (!da->prealloc_only) { 1380 for (i = xs; i < xs + nx; i++) { 1381 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1382 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1383 for (j = ys; j < ys + ny; j++) { 1384 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1385 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1386 for (k = zs; k < zs + nz; k++) { 1387 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1388 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1389 1390 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1391 1392 cnt = 0; 1393 for (kk = kstart; kk < kend + 1; kk++) { 1394 for (jj = jstart; jj < jend + 1; jj++) { 1395 for (ii = istart; ii < iend + 1; ii++) { 1396 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1397 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1398 for (l = 1; l < nc; l++) { 1399 cols[cnt] = 1 + cols[cnt - 1]; 1400 cnt++; 1401 } 1402 } 1403 } 1404 } 1405 } 1406 rows[0] = nc * (slot); 1407 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1408 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1409 } 1410 } 1411 } 1412 /* do not copy values to GPU since they are all zero and not yet needed there */ 1413 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1414 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1415 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1416 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1417 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1418 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1419 } 1420 PetscCall(PetscFree2(rows, cols)); 1421 PetscFunctionReturn(PETSC_SUCCESS); 1422 } 1423 1424 /* ---------------------------------------------------------------------------------*/ 1425 1426 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1427 { 1428 DM_DA *dd = (DM_DA *)da->data; 1429 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1430 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1431 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1432 DMBoundaryType bx; 1433 ISLocalToGlobalMapping ltog; 1434 PetscMPIInt rank, size; 1435 1436 PetscFunctionBegin; 1437 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1438 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1439 1440 /* 1441 nc - number of components per grid point 1442 */ 1443 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1444 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1445 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1446 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1447 1448 PetscCall(MatSetBlockSize(J, nc)); 1449 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1450 1451 /* 1452 note should be smaller for first and last process with no periodic 1453 does not handle dfill 1454 */ 1455 cnt = 0; 1456 /* coupling with process to the left */ 1457 for (i = 0; i < s; i++) { 1458 for (j = 0; j < nc; j++) { 1459 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1460 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1461 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1462 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1463 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1464 } 1465 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1466 cnt++; 1467 } 1468 } 1469 for (i = s; i < nx - s; i++) { 1470 for (j = 0; j < nc; j++) { 1471 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1472 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1473 cnt++; 1474 } 1475 } 1476 /* coupling with process to the right */ 1477 for (i = nx - s; i < nx; i++) { 1478 for (j = 0; j < nc; j++) { 1479 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1480 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1481 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1482 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1483 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1484 } 1485 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1486 cnt++; 1487 } 1488 } 1489 1490 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1491 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1492 PetscCall(PetscFree2(cols, ocols)); 1493 1494 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1495 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1496 1497 /* 1498 For each node in the grid: we get the neighbors in the local (on processor ordering 1499 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1500 PETSc ordering. 1501 */ 1502 if (!da->prealloc_only) { 1503 PetscCall(PetscMalloc1(maxcnt, &cols)); 1504 row = xs * nc; 1505 /* coupling with process to the left */ 1506 for (i = xs; i < xs + s; i++) { 1507 for (j = 0; j < nc; j++) { 1508 cnt = 0; 1509 if (rank) { 1510 for (l = 0; l < s; l++) { 1511 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1512 } 1513 } 1514 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1515 for (l = 0; l < s; l++) { 1516 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1517 } 1518 } 1519 if (dfill) { 1520 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1521 } else { 1522 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1523 } 1524 for (l = 0; l < s; l++) { 1525 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1526 } 1527 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1528 row++; 1529 } 1530 } 1531 for (i = xs + s; i < xs + nx - s; i++) { 1532 for (j = 0; j < nc; j++) { 1533 cnt = 0; 1534 for (l = 0; l < s; l++) { 1535 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1536 } 1537 if (dfill) { 1538 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1539 } else { 1540 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1541 } 1542 for (l = 0; l < s; l++) { 1543 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1544 } 1545 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1546 row++; 1547 } 1548 } 1549 /* coupling with process to the right */ 1550 for (i = xs + nx - s; i < xs + nx; i++) { 1551 for (j = 0; j < nc; j++) { 1552 cnt = 0; 1553 for (l = 0; l < s; l++) { 1554 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1555 } 1556 if (dfill) { 1557 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1558 } else { 1559 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1560 } 1561 if (rank < size - 1) { 1562 for (l = 0; l < s; l++) { 1563 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1564 } 1565 } 1566 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1567 for (l = 0; l < s; l++) { 1568 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1569 } 1570 } 1571 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1572 row++; 1573 } 1574 } 1575 PetscCall(PetscFree(cols)); 1576 /* do not copy values to GPU since they are all zero and not yet needed there */ 1577 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1578 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1579 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1580 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1581 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1582 } 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 /* ---------------------------------------------------------------------------------*/ 1587 1588 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1589 { 1590 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1591 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1592 PetscInt istart, iend; 1593 DMBoundaryType bx; 1594 ISLocalToGlobalMapping ltog, mltog; 1595 1596 PetscFunctionBegin; 1597 /* 1598 nc - number of components per grid point 1599 col - number of colors needed in one direction for single component problem 1600 */ 1601 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1602 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1603 col = 2 * s + 1; 1604 1605 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1606 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1607 1608 PetscCall(MatSetBlockSize(J, nc)); 1609 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1610 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1611 1612 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1613 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1614 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1615 1616 /* 1617 For each node in the grid: we get the neighbors in the local (on processor ordering 1618 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1619 PETSc ordering. 1620 */ 1621 if (!da->prealloc_only) { 1622 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1623 for (i = xs; i < xs + nx; i++) { 1624 istart = PetscMax(-s, gxs - i); 1625 iend = PetscMin(s, gxs + gnx - i - 1); 1626 slot = i - gxs; 1627 1628 cnt = 0; 1629 for (i1 = istart; i1 < iend + 1; i1++) { 1630 cols[cnt++] = nc * (slot + i1); 1631 for (l = 1; l < nc; l++) { 1632 cols[cnt] = 1 + cols[cnt - 1]; 1633 cnt++; 1634 } 1635 } 1636 rows[0] = nc * (slot); 1637 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1638 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1639 } 1640 /* do not copy values to GPU since they are all zero and not yet needed there */ 1641 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1642 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1643 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1644 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1645 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1646 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1647 PetscCall(PetscFree2(rows, cols)); 1648 } 1649 PetscFunctionReturn(PETSC_SUCCESS); 1650 } 1651 1652 /* ---------------------------------------------------------------------------------*/ 1653 1654 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1655 { 1656 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1657 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1658 PetscInt istart, iend; 1659 DMBoundaryType bx; 1660 ISLocalToGlobalMapping ltog, mltog; 1661 1662 PetscFunctionBegin; 1663 /* 1664 nc - number of components per grid point 1665 col - number of colors needed in one direction for single component problem 1666 */ 1667 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1668 col = 2 * s + 1; 1669 1670 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1671 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1672 1673 PetscCall(MatSetBlockSize(J, nc)); 1674 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1675 1676 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1677 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1678 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1679 1680 /* 1681 For each node in the grid: we get the neighbors in the local (on processor ordering 1682 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1683 PETSc ordering. 1684 */ 1685 if (!da->prealloc_only) { 1686 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1687 for (i = xs; i < xs + nx; i++) { 1688 istart = PetscMax(-s, gxs - i); 1689 iend = PetscMin(s, gxs + gnx - i - 1); 1690 slot = i - gxs; 1691 1692 cnt = 0; 1693 for (i1 = istart; i1 < iend + 1; i1++) { 1694 cols[cnt++] = nc * (slot + i1); 1695 for (l = 1; l < nc; l++) { 1696 cols[cnt] = 1 + cols[cnt - 1]; 1697 cnt++; 1698 } 1699 } 1700 rows[0] = nc * (slot); 1701 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1702 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1703 } 1704 /* do not copy values to GPU since they are all zero and not yet needed there */ 1705 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1706 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1707 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1708 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1709 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1710 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1711 PetscCall(PetscFree2(rows, cols)); 1712 } 1713 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1718 { 1719 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1720 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1721 PetscInt istart, iend, jstart, jend, ii, jj; 1722 MPI_Comm comm; 1723 PetscScalar *values; 1724 DMBoundaryType bx, by; 1725 DMDAStencilType st; 1726 ISLocalToGlobalMapping ltog; 1727 1728 PetscFunctionBegin; 1729 /* 1730 nc - number of components per grid point 1731 col - number of colors needed in one direction for single component problem 1732 */ 1733 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1734 col = 2 * s + 1; 1735 1736 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1737 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1738 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1739 1740 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1741 1742 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1743 1744 /* determine the matrix preallocation information */ 1745 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1746 for (i = xs; i < xs + nx; i++) { 1747 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1748 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1749 for (j = ys; j < ys + ny; j++) { 1750 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1751 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1752 slot = i - gxs + gnx * (j - gys); 1753 1754 /* Find block columns in block row */ 1755 cnt = 0; 1756 for (ii = istart; ii < iend + 1; ii++) { 1757 for (jj = jstart; jj < jend + 1; jj++) { 1758 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1759 cols[cnt++] = slot + ii + gnx * jj; 1760 } 1761 } 1762 } 1763 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1764 } 1765 } 1766 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1767 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1768 MatPreallocateEnd(dnz, onz); 1769 1770 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1771 1772 /* 1773 For each node in the grid: we get the neighbors in the local (on processor ordering 1774 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1775 PETSc ordering. 1776 */ 1777 if (!da->prealloc_only) { 1778 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1779 for (i = xs; i < xs + nx; i++) { 1780 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1781 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1782 for (j = ys; j < ys + ny; j++) { 1783 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1784 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1785 slot = i - gxs + gnx * (j - gys); 1786 cnt = 0; 1787 for (ii = istart; ii < iend + 1; ii++) { 1788 for (jj = jstart; jj < jend + 1; jj++) { 1789 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1790 cols[cnt++] = slot + ii + gnx * jj; 1791 } 1792 } 1793 } 1794 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1795 } 1796 } 1797 PetscCall(PetscFree(values)); 1798 /* do not copy values to GPU since they are all zero and not yet needed there */ 1799 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1800 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1801 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1802 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1803 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1804 } 1805 PetscCall(PetscFree(cols)); 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1810 { 1811 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1812 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1813 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1814 MPI_Comm comm; 1815 PetscScalar *values; 1816 DMBoundaryType bx, by, bz; 1817 DMDAStencilType st; 1818 ISLocalToGlobalMapping ltog; 1819 1820 PetscFunctionBegin; 1821 /* 1822 nc - number of components per grid point 1823 col - number of colors needed in one direction for single component problem 1824 */ 1825 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1826 col = 2 * s + 1; 1827 1828 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1829 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1830 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1831 1832 PetscCall(PetscMalloc1(col * col * col, &cols)); 1833 1834 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1835 1836 /* determine the matrix preallocation information */ 1837 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1838 for (i = xs; i < xs + nx; i++) { 1839 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1840 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1841 for (j = ys; j < ys + ny; j++) { 1842 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1843 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1844 for (k = zs; k < zs + nz; k++) { 1845 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1846 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1847 1848 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1849 1850 /* Find block columns in block row */ 1851 cnt = 0; 1852 for (ii = istart; ii < iend + 1; ii++) { 1853 for (jj = jstart; jj < jend + 1; jj++) { 1854 for (kk = kstart; kk < kend + 1; kk++) { 1855 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1856 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1857 } 1858 } 1859 } 1860 } 1861 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1862 } 1863 } 1864 } 1865 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1866 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1867 MatPreallocateEnd(dnz, onz); 1868 1869 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1870 1871 /* 1872 For each node in the grid: we get the neighbors in the local (on processor ordering 1873 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1874 PETSc ordering. 1875 */ 1876 if (!da->prealloc_only) { 1877 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1878 for (i = xs; i < xs + nx; i++) { 1879 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1880 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1881 for (j = ys; j < ys + ny; j++) { 1882 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1883 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1884 for (k = zs; k < zs + nz; k++) { 1885 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1886 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1887 1888 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1889 1890 cnt = 0; 1891 for (ii = istart; ii < iend + 1; ii++) { 1892 for (jj = jstart; jj < jend + 1; jj++) { 1893 for (kk = kstart; kk < kend + 1; kk++) { 1894 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1895 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1896 } 1897 } 1898 } 1899 } 1900 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1901 } 1902 } 1903 } 1904 PetscCall(PetscFree(values)); 1905 /* do not copy values to GPU since they are all zero and not yet needed there */ 1906 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1907 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1908 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1909 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1910 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1911 } 1912 PetscCall(PetscFree(cols)); 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /* 1917 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1918 identify in the local ordering with periodic domain. 1919 */ 1920 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1921 { 1922 PetscInt i, n; 1923 1924 PetscFunctionBegin; 1925 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1926 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1927 for (i = 0, n = 0; i < *cnt; i++) { 1928 if (col[i] >= *row) col[n++] = col[i]; 1929 } 1930 *cnt = n; 1931 PetscFunctionReturn(PETSC_SUCCESS); 1932 } 1933 1934 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1935 { 1936 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1937 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1938 PetscInt istart, iend, jstart, jend, ii, jj; 1939 MPI_Comm comm; 1940 PetscScalar *values; 1941 DMBoundaryType bx, by; 1942 DMDAStencilType st; 1943 ISLocalToGlobalMapping ltog; 1944 1945 PetscFunctionBegin; 1946 /* 1947 nc - number of components per grid point 1948 col - number of colors needed in one direction for single component problem 1949 */ 1950 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1951 col = 2 * s + 1; 1952 1953 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1954 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1955 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1956 1957 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1958 1959 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1960 1961 /* determine the matrix preallocation information */ 1962 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1963 for (i = xs; i < xs + nx; i++) { 1964 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1965 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1966 for (j = ys; j < ys + ny; j++) { 1967 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1968 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1969 slot = i - gxs + gnx * (j - gys); 1970 1971 /* Find block columns in block row */ 1972 cnt = 0; 1973 for (ii = istart; ii < iend + 1; ii++) { 1974 for (jj = jstart; jj < jend + 1; jj++) { 1975 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1976 } 1977 } 1978 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1979 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1980 } 1981 } 1982 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1983 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1984 MatPreallocateEnd(dnz, onz); 1985 1986 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1987 1988 /* 1989 For each node in the grid: we get the neighbors in the local (on processor ordering 1990 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1991 PETSc ordering. 1992 */ 1993 if (!da->prealloc_only) { 1994 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1995 for (i = xs; i < xs + nx; i++) { 1996 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1997 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1998 for (j = ys; j < ys + ny; j++) { 1999 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2000 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2001 slot = i - gxs + gnx * (j - gys); 2002 2003 /* Find block columns in block row */ 2004 cnt = 0; 2005 for (ii = istart; ii < iend + 1; ii++) { 2006 for (jj = jstart; jj < jend + 1; jj++) { 2007 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2008 } 2009 } 2010 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2011 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2012 } 2013 } 2014 PetscCall(PetscFree(values)); 2015 /* do not copy values to GPU since they are all zero and not yet needed there */ 2016 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2017 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2018 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2019 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2020 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2021 } 2022 PetscCall(PetscFree(cols)); 2023 PetscFunctionReturn(PETSC_SUCCESS); 2024 } 2025 2026 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2027 { 2028 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2029 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2030 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2031 MPI_Comm comm; 2032 PetscScalar *values; 2033 DMBoundaryType bx, by, bz; 2034 DMDAStencilType st; 2035 ISLocalToGlobalMapping ltog; 2036 2037 PetscFunctionBegin; 2038 /* 2039 nc - number of components per grid point 2040 col - number of colors needed in one direction for single component problem 2041 */ 2042 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2043 col = 2 * s + 1; 2044 2045 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2046 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2047 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2048 2049 /* create the matrix */ 2050 PetscCall(PetscMalloc1(col * col * col, &cols)); 2051 2052 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2053 2054 /* determine the matrix preallocation information */ 2055 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2056 for (i = xs; i < xs + nx; i++) { 2057 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2058 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2059 for (j = ys; j < ys + ny; j++) { 2060 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2061 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2062 for (k = zs; k < zs + nz; k++) { 2063 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2064 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2065 2066 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2067 2068 /* Find block columns in block row */ 2069 cnt = 0; 2070 for (ii = istart; ii < iend + 1; ii++) { 2071 for (jj = jstart; jj < jend + 1; jj++) { 2072 for (kk = kstart; kk < kend + 1; kk++) { 2073 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2074 } 2075 } 2076 } 2077 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2078 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2079 } 2080 } 2081 } 2082 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2083 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2084 MatPreallocateEnd(dnz, onz); 2085 2086 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2087 2088 /* 2089 For each node in the grid: we get the neighbors in the local (on processor ordering 2090 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2091 PETSc ordering. 2092 */ 2093 if (!da->prealloc_only) { 2094 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2095 for (i = xs; i < xs + nx; i++) { 2096 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2097 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2098 for (j = ys; j < ys + ny; j++) { 2099 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2100 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2101 for (k = zs; k < zs + nz; k++) { 2102 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2103 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2104 2105 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2106 2107 cnt = 0; 2108 for (ii = istart; ii < iend + 1; ii++) { 2109 for (jj = jstart; jj < jend + 1; jj++) { 2110 for (kk = kstart; kk < kend + 1; kk++) { 2111 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2112 } 2113 } 2114 } 2115 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2116 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2117 } 2118 } 2119 } 2120 PetscCall(PetscFree(values)); 2121 /* do not copy values to GPU since they are all zero and not yet needed there */ 2122 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2123 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2124 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2125 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2126 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2127 } 2128 PetscCall(PetscFree(cols)); 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 /* ---------------------------------------------------------------------------------*/ 2133 2134 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2135 { 2136 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2137 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2138 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2139 DM_DA *dd = (DM_DA *)da->data; 2140 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2141 MPI_Comm comm; 2142 PetscScalar *values; 2143 DMBoundaryType bx, by, bz; 2144 ISLocalToGlobalMapping ltog; 2145 DMDAStencilType st; 2146 PetscBool removedups = PETSC_FALSE; 2147 2148 PetscFunctionBegin; 2149 /* 2150 nc - number of components per grid point 2151 col - number of colors needed in one direction for single component problem 2152 */ 2153 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2154 col = 2 * s + 1; 2155 2156 /* 2157 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2158 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2159 */ 2160 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2161 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2162 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2163 2164 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2165 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2166 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2167 2168 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2169 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2170 2171 /* determine the matrix preallocation information */ 2172 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2173 2174 PetscCall(MatSetBlockSize(J, nc)); 2175 for (i = xs; i < xs + nx; i++) { 2176 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2177 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2178 for (j = ys; j < ys + ny; j++) { 2179 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2180 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2181 for (k = zs; k < zs + nz; k++) { 2182 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2183 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2184 2185 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2186 2187 for (l = 0; l < nc; l++) { 2188 cnt = 0; 2189 for (ii = istart; ii < iend + 1; ii++) { 2190 for (jj = jstart; jj < jend + 1; jj++) { 2191 for (kk = kstart; kk < kend + 1; kk++) { 2192 if (ii || jj || kk) { 2193 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2194 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2195 } 2196 } else { 2197 if (dfill) { 2198 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2199 } else { 2200 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2201 } 2202 } 2203 } 2204 } 2205 } 2206 row = l + nc * (slot); 2207 maxcnt = PetscMax(maxcnt, cnt); 2208 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2209 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2210 } 2211 } 2212 } 2213 } 2214 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2215 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2216 MatPreallocateEnd(dnz, onz); 2217 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2218 2219 /* 2220 For each node in the grid: we get the neighbors in the local (on processor ordering 2221 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2222 PETSc ordering. 2223 */ 2224 if (!da->prealloc_only) { 2225 PetscCall(PetscCalloc1(maxcnt, &values)); 2226 for (i = xs; i < xs + nx; i++) { 2227 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2228 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2229 for (j = ys; j < ys + ny; j++) { 2230 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2231 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2232 for (k = zs; k < zs + nz; k++) { 2233 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2234 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2235 2236 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2237 2238 for (l = 0; l < nc; l++) { 2239 cnt = 0; 2240 for (ii = istart; ii < iend + 1; ii++) { 2241 for (jj = jstart; jj < jend + 1; jj++) { 2242 for (kk = kstart; kk < kend + 1; kk++) { 2243 if (ii || jj || kk) { 2244 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2245 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2246 } 2247 } else { 2248 if (dfill) { 2249 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2250 } else { 2251 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2252 } 2253 } 2254 } 2255 } 2256 } 2257 row = l + nc * (slot); 2258 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2259 } 2260 } 2261 } 2262 } 2263 PetscCall(PetscFree(values)); 2264 /* do not copy values to GPU since they are all zero and not yet needed there */ 2265 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2266 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2267 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2268 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2269 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2270 } 2271 PetscCall(PetscFree(cols)); 2272 PetscFunctionReturn(PETSC_SUCCESS); 2273 } 2274