1 2 #include <petsc/private/dmdaimpl.h> /*I "petscdmda.h" I*/ 3 #include <petscmat.h> 4 #include <petscbt.h> 5 6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *); 7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *); 8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *); 9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *); 10 11 /* 12 For ghost i that may be negative or greater than the upper bound this 13 maps it into the 0:m-1 range using periodicity 14 */ 15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i)) 16 17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill) 18 { 19 PetscInt i, j, nz, *fill; 20 21 PetscFunctionBegin; 22 if (!dfill) PetscFunctionReturn(0); 23 24 /* count number nonzeros */ 25 nz = 0; 26 for (i = 0; i < w; i++) { 27 for (j = 0; j < w; j++) { 28 if (dfill[w * i + j]) nz++; 29 } 30 } 31 PetscCall(PetscMalloc1(nz + w + 1, &fill)); 32 /* construct modified CSR storage of nonzero structure */ 33 /* fill[0 -- w] marks starts of each row of column indices (and end of last row) 34 so fill[1] - fill[0] gives number of nonzeros in first row etc */ 35 nz = w + 1; 36 for (i = 0; i < w; i++) { 37 fill[i] = nz; 38 for (j = 0; j < w; j++) { 39 if (dfill[w * i + j]) { 40 fill[nz] = j; 41 nz++; 42 } 43 } 44 } 45 fill[w] = nz; 46 47 *rfill = fill; 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill) 52 { 53 PetscInt nz; 54 55 PetscFunctionBegin; 56 if (!dfillsparse) PetscFunctionReturn(0); 57 58 /* Determine number of non-zeros */ 59 nz = (dfillsparse[w] - w - 1); 60 61 /* Allocate space for our copy of the given sparse matrix representation. */ 62 PetscCall(PetscMalloc1(nz + w + 1, rfill)); 63 PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1)); 64 PetscFunctionReturn(0); 65 } 66 67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd) 68 { 69 PetscInt i, k, cnt = 1; 70 71 PetscFunctionBegin; 72 73 /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of 74 columns to the left with any nonzeros in them plus 1 */ 75 PetscCall(PetscCalloc1(dd->w, &dd->ofillcols)); 76 for (i = 0; i < dd->w; i++) { 77 for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1; 78 } 79 for (i = 0; i < dd->w; i++) { 80 if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 /*@ 86 DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem 87 of the matrix returned by DMCreateMatrix(). 88 89 Logically Collective on da 90 91 Input Parameters: 92 + da - the distributed array 93 . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block) 94 - ofill - the fill pattern in the off-diagonal blocks 95 96 Level: developer 97 98 Notes: 99 This only makes sense when you are doing multicomponent problems but using the 100 MPIAIJ matrix format 101 102 The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries 103 representing coupling and 0 entries for missing coupling. For example 104 $ dfill[9] = {1, 0, 0, 105 $ 1, 1, 0, 106 $ 0, 1, 1} 107 means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with 108 itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the 109 diagonal block). 110 111 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 112 can be represented in the dfill, ofill format 113 114 Contributed by Glenn Hammond 115 116 .seealso `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 117 118 @*/ 119 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill) 120 { 121 DM_DA *dd = (DM_DA *)da->data; 122 123 PetscFunctionBegin; 124 /* save the given dfill and ofill information */ 125 PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill)); 126 PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill)); 127 128 /* count nonzeros in ofill columns */ 129 PetscCall(DMDASetBlockFills_Private2(dd)); 130 PetscFunctionReturn(0); 131 } 132 133 /*@ 134 DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem 135 of the matrix returned by DMCreateMatrix(), using sparse representations 136 of fill patterns. 137 138 Logically Collective on da 139 140 Input Parameters: 141 + da - the distributed array 142 . dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block) 143 - ofill - the sparse fill pattern in the off-diagonal blocks 144 145 Level: developer 146 147 Notes: This only makes sense when you are doing multicomponent problems but using the 148 MPIAIJ matrix format 149 150 The format for dfill and ofill is a sparse representation of a 151 dof-by-dof matrix with 1 entries representing coupling and 0 entries 152 for missing coupling. The sparse representation is a 1 dimensional 153 array of length nz + dof + 1, where nz is the number of non-zeros in 154 the matrix. The first dof entries in the array give the 155 starting array indices of each row's items in the rest of the array, 156 the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array) 157 and the remaining nz items give the column indices of each of 158 the 1s within the logical 2D matrix. Each row's items within 159 the array are the column indices of the 1s within that row 160 of the 2D matrix. PETSc developers may recognize that this is the 161 same format as that computed by the DMDASetBlockFills_Private() 162 function from a dense 2D matrix representation. 163 164 DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then 165 can be represented in the dfill, ofill format 166 167 Contributed by Philip C. Roth 168 169 .seealso `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()` 170 171 @*/ 172 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse) 173 { 174 DM_DA *dd = (DM_DA *)da->data; 175 176 PetscFunctionBegin; 177 /* save the given dfill and ofill information */ 178 PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill)); 179 PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill)); 180 181 /* count nonzeros in ofill columns */ 182 PetscCall(DMDASetBlockFills_Private2(dd)); 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring) 187 { 188 PetscInt dim, m, n, p, nc; 189 DMBoundaryType bx, by, bz; 190 MPI_Comm comm; 191 PetscMPIInt size; 192 PetscBool isBAIJ; 193 DM_DA *dd = (DM_DA *)da->data; 194 195 PetscFunctionBegin; 196 /* 197 m 198 ------------------------------------------------------ 199 | | 200 | | 201 | ---------------------- | 202 | | | | 203 n | yn | | | 204 | | | | 205 | .--------------------- | 206 | (xs,ys) xn | 207 | . | 208 | (gxs,gys) | 209 | | 210 ----------------------------------------------------- 211 */ 212 213 /* 214 nc - number of components per grid point 215 col - number of colors needed in one direction for single component problem 216 217 */ 218 PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL)); 219 220 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 221 PetscCallMPI(MPI_Comm_size(comm, &size)); 222 if (ctype == IS_COLORING_LOCAL) { 223 if (size == 1) { 224 ctype = IS_COLORING_GLOBAL; 225 } else if (dim > 1) { 226 if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) { 227 SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process"); 228 } 229 } 230 } 231 232 /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ 233 matrices is for the blocks, not the individual matrix elements */ 234 PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ)); 235 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ)); 236 if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ)); 237 if (isBAIJ) { 238 dd->w = 1; 239 dd->xs = dd->xs / nc; 240 dd->xe = dd->xe / nc; 241 dd->Xs = dd->Xs / nc; 242 dd->Xe = dd->Xe / nc; 243 } 244 245 /* 246 We do not provide a getcoloring function in the DMDA operations because 247 the basic DMDA does not know about matrices. We think of DMDA as being 248 more low-level then matrices. 249 */ 250 if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring)); 251 else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring)); 252 else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring)); 253 else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim); 254 if (isBAIJ) { 255 dd->w = nc; 256 dd->xs = dd->xs * nc; 257 dd->xe = dd->xe * nc; 258 dd->Xs = dd->Xs * nc; 259 dd->Xe = dd->Xe * nc; 260 } 261 PetscFunctionReturn(0); 262 } 263 264 /* ---------------------------------------------------------------------------------*/ 265 266 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 267 { 268 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col; 269 PetscInt ncolors; 270 MPI_Comm comm; 271 DMBoundaryType bx, by; 272 DMDAStencilType st; 273 ISColoringValue *colors; 274 DM_DA *dd = (DM_DA *)da->data; 275 276 PetscFunctionBegin; 277 /* 278 nc - number of components per grid point 279 col - number of colors needed in one direction for single component problem 280 281 */ 282 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 283 col = 2 * s + 1; 284 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 285 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 286 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 287 288 /* special case as taught to us by Paul Hovland */ 289 if (st == DMDA_STENCIL_STAR && s == 1) { 290 PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring)); 291 } else { 292 if (ctype == IS_COLORING_GLOBAL) { 293 if (!dd->localcoloring) { 294 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 295 ii = 0; 296 for (j = ys; j < ys + ny; j++) { 297 for (i = xs; i < xs + nx; i++) { 298 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col)); 299 } 300 } 301 ncolors = nc + nc * (col - 1 + col * (col - 1)); 302 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 303 } 304 *coloring = dd->localcoloring; 305 } else if (ctype == IS_COLORING_LOCAL) { 306 if (!dd->ghostedcoloring) { 307 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 308 ii = 0; 309 for (j = gys; j < gys + gny; j++) { 310 for (i = gxs; i < gxs + gnx; i++) { 311 for (k = 0; k < nc; k++) { 312 /* the complicated stuff is to handle periodic boundaries */ 313 colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)); 314 } 315 } 316 } 317 ncolors = nc + nc * (col - 1 + col * (col - 1)); 318 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 319 /* PetscIntView(ncolors,(PetscInt*)colors,0); */ 320 321 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 322 } 323 *coloring = dd->ghostedcoloring; 324 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 325 } 326 PetscCall(ISColoringReference(*coloring)); 327 PetscFunctionReturn(0); 328 } 329 330 /* ---------------------------------------------------------------------------------*/ 331 332 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 333 { 334 PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P; 335 PetscInt ncolors; 336 MPI_Comm comm; 337 DMBoundaryType bx, by, bz; 338 DMDAStencilType st; 339 ISColoringValue *colors; 340 DM_DA *dd = (DM_DA *)da->data; 341 342 PetscFunctionBegin; 343 /* 344 nc - number of components per grid point 345 col - number of colors needed in one direction for single component problem 346 347 */ 348 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 349 col = 2 * s + 1; 350 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 351 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 352 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 353 354 /* create the coloring */ 355 if (ctype == IS_COLORING_GLOBAL) { 356 if (!dd->localcoloring) { 357 PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors)); 358 ii = 0; 359 for (k = zs; k < zs + nz; k++) { 360 for (j = ys; j < ys + ny; j++) { 361 for (i = xs; i < xs + nx; i++) { 362 for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col)); 363 } 364 } 365 } 366 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 367 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 368 } 369 *coloring = dd->localcoloring; 370 } else if (ctype == IS_COLORING_LOCAL) { 371 if (!dd->ghostedcoloring) { 372 PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors)); 373 ii = 0; 374 for (k = gzs; k < gzs + gnz; k++) { 375 for (j = gys; j < gys + gny; j++) { 376 for (i = gxs; i < gxs + gnx; i++) { 377 for (l = 0; l < nc; l++) { 378 /* the complicated stuff is to handle periodic boundaries */ 379 colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)); 380 } 381 } 382 } 383 } 384 ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1)); 385 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 386 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 387 } 388 *coloring = dd->ghostedcoloring; 389 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 390 PetscCall(ISColoringReference(*coloring)); 391 PetscFunctionReturn(0); 392 } 393 394 /* ---------------------------------------------------------------------------------*/ 395 396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 397 { 398 PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col; 399 PetscInt ncolors; 400 MPI_Comm comm; 401 DMBoundaryType bx; 402 ISColoringValue *colors; 403 DM_DA *dd = (DM_DA *)da->data; 404 405 PetscFunctionBegin; 406 /* 407 nc - number of components per grid point 408 col - number of colors needed in one direction for single component problem 409 410 */ 411 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 412 col = 2 * s + 1; 413 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 414 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 415 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 416 417 /* create the coloring */ 418 if (ctype == IS_COLORING_GLOBAL) { 419 if (!dd->localcoloring) { 420 PetscCall(PetscMalloc1(nc * nx, &colors)); 421 if (dd->ofillcols) { 422 PetscInt tc = 0; 423 for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0); 424 i1 = 0; 425 for (i = xs; i < xs + nx; i++) { 426 for (l = 0; l < nc; l++) { 427 if (dd->ofillcols[l] && (i % col)) { 428 colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l]; 429 } else { 430 colors[i1++] = l; 431 } 432 } 433 } 434 ncolors = nc + 2 * s * tc; 435 } else { 436 i1 = 0; 437 for (i = xs; i < xs + nx; i++) { 438 for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col); 439 } 440 ncolors = nc + nc * (col - 1); 441 } 442 PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 443 } 444 *coloring = dd->localcoloring; 445 } else if (ctype == IS_COLORING_LOCAL) { 446 if (!dd->ghostedcoloring) { 447 PetscCall(PetscMalloc1(nc * gnx, &colors)); 448 i1 = 0; 449 for (i = gxs; i < gxs + gnx; i++) { 450 for (l = 0; l < nc; l++) { 451 /* the complicated stuff is to handle periodic boundaries */ 452 colors[i1++] = l + nc * (SetInRange(i, m) % col); 453 } 454 } 455 ncolors = nc + nc * (col - 1); 456 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 457 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 458 } 459 *coloring = dd->ghostedcoloring; 460 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 461 PetscCall(ISColoringReference(*coloring)); 462 PetscFunctionReturn(0); 463 } 464 465 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring) 466 { 467 PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc; 468 PetscInt ncolors; 469 MPI_Comm comm; 470 DMBoundaryType bx, by; 471 ISColoringValue *colors; 472 DM_DA *dd = (DM_DA *)da->data; 473 474 PetscFunctionBegin; 475 /* 476 nc - number of components per grid point 477 col - number of colors needed in one direction for single component problem 478 479 */ 480 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL)); 481 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 482 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 483 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 484 /* create the coloring */ 485 if (ctype == IS_COLORING_GLOBAL) { 486 if (!dd->localcoloring) { 487 PetscCall(PetscMalloc1(nc * nx * ny, &colors)); 488 ii = 0; 489 for (j = ys; j < ys + ny; j++) { 490 for (i = xs; i < xs + nx; i++) { 491 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5); 492 } 493 } 494 ncolors = 5 * nc; 495 PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring)); 496 } 497 *coloring = dd->localcoloring; 498 } else if (ctype == IS_COLORING_LOCAL) { 499 if (!dd->ghostedcoloring) { 500 PetscCall(PetscMalloc1(nc * gnx * gny, &colors)); 501 ii = 0; 502 for (j = gys; j < gys + gny; j++) { 503 for (i = gxs; i < gxs + gnx; i++) { 504 for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5); 505 } 506 } 507 ncolors = 5 * nc; 508 PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring)); 509 PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL)); 510 } 511 *coloring = dd->ghostedcoloring; 512 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype); 513 PetscFunctionReturn(0); 514 } 515 516 /* =========================================================================== */ 517 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat); 518 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat); 519 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat); 520 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat); 521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat); 522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat); 523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat); 524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat); 525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat); 526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat); 527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat); 528 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat); 529 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat); 530 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat); 531 532 /*@C 533 MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix 534 535 Logically Collective on mat 536 537 Input Parameters: 538 + mat - the matrix 539 - da - the da 540 541 Level: intermediate 542 543 @*/ 544 PetscErrorCode MatSetupDM(Mat mat, DM da) 545 { 546 PetscFunctionBegin; 547 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 548 PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA); 549 PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da)); 550 PetscFunctionReturn(0); 551 } 552 553 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer) 554 { 555 DM da; 556 const char *prefix; 557 Mat Anatural; 558 AO ao; 559 PetscInt rstart, rend, *petsc, i; 560 IS is; 561 MPI_Comm comm; 562 PetscViewerFormat format; 563 564 PetscFunctionBegin; 565 /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */ 566 PetscCall(PetscViewerGetFormat(viewer, &format)); 567 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0); 568 569 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 570 PetscCall(MatGetDM(A, &da)); 571 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 572 573 PetscCall(DMDAGetAO(da, &ao)); 574 PetscCall(MatGetOwnershipRange(A, &rstart, &rend)); 575 PetscCall(PetscMalloc1(rend - rstart, &petsc)); 576 for (i = rstart; i < rend; i++) petsc[i - rstart] = i; 577 PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc)); 578 PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is)); 579 580 /* call viewer on natural ordering */ 581 PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural)); 582 PetscCall(ISDestroy(&is)); 583 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix)); 584 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix)); 585 PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name)); 586 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE; 587 PetscCall(MatView(Anatural, viewer)); 588 ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE; 589 PetscCall(MatDestroy(&Anatural)); 590 PetscFunctionReturn(0); 591 } 592 593 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer) 594 { 595 DM da; 596 Mat Anatural, Aapp; 597 AO ao; 598 PetscInt rstart, rend, *app, i, m, n, M, N; 599 IS is; 600 MPI_Comm comm; 601 602 PetscFunctionBegin; 603 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 604 PetscCall(MatGetDM(A, &da)); 605 PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA"); 606 607 /* Load the matrix in natural ordering */ 608 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural)); 609 PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name)); 610 PetscCall(MatGetSize(A, &M, &N)); 611 PetscCall(MatGetLocalSize(A, &m, &n)); 612 PetscCall(MatSetSizes(Anatural, m, n, M, N)); 613 PetscCall(MatLoad(Anatural, viewer)); 614 615 /* Map natural ordering to application ordering and create IS */ 616 PetscCall(DMDAGetAO(da, &ao)); 617 PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend)); 618 PetscCall(PetscMalloc1(rend - rstart, &app)); 619 for (i = rstart; i < rend; i++) app[i - rstart] = i; 620 PetscCall(AOPetscToApplication(ao, rend - rstart, app)); 621 PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is)); 622 623 /* Do permutation and replace header */ 624 PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp)); 625 PetscCall(MatHeaderReplace(A, &Aapp)); 626 PetscCall(ISDestroy(&is)); 627 PetscCall(MatDestroy(&Anatural)); 628 PetscFunctionReturn(0); 629 } 630 631 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J) 632 { 633 PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P; 634 Mat A; 635 MPI_Comm comm; 636 MatType Atype; 637 void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL; 638 MatType mtype; 639 PetscMPIInt size; 640 DM_DA *dd = (DM_DA *)da->data; 641 642 PetscFunctionBegin; 643 PetscCall(MatInitializePackage()); 644 mtype = da->mattype; 645 646 /* 647 m 648 ------------------------------------------------------ 649 | | 650 | | 651 | ---------------------- | 652 | | | | 653 n | ny | | | 654 | | | | 655 | .--------------------- | 656 | (xs,ys) nx | 657 | . | 658 | (gxs,gys) | 659 | | 660 ----------------------------------------------------- 661 */ 662 663 /* 664 nc - number of components per grid point 665 col - number of colors needed in one direction for single component problem 666 667 */ 668 M = dd->M; 669 N = dd->N; 670 P = dd->P; 671 dim = da->dim; 672 dof = dd->w; 673 /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */ 674 PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz)); 675 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 676 PetscCall(MatCreate(comm, &A)); 677 PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P)); 678 PetscCall(MatSetType(A, mtype)); 679 PetscCall(MatSetFromOptions(A)); 680 if (dof * nx * ny * nz < da->bind_below) { 681 PetscCall(MatSetBindingPropagates(A, PETSC_TRUE)); 682 PetscCall(MatBindToCPU(A, PETSC_TRUE)); 683 } 684 PetscCall(MatSetDM(A, da)); 685 if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)); 686 PetscCall(MatGetType(A, &Atype)); 687 /* 688 We do not provide a getmatrix function in the DMDA operations because 689 the basic DMDA does not know about matrices. We think of DMDA as being more 690 more low-level than matrices. This is kind of cheating but, cause sometimes 691 we think of DMDA has higher level than matrices. 692 693 We could switch based on Atype (or mtype), but we do not since the 694 specialized setting routines depend only on the particular preallocation 695 details of the matrix, not the type itself. 696 */ 697 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij)); 698 if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij)); 699 if (!aij) { 700 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij)); 701 if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij)); 702 if (!baij) { 703 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij)); 704 if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij)); 705 if (!sbaij) { 706 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell)); 707 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell)); 708 } 709 if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is)); 710 } 711 } 712 if (aij) { 713 if (dim == 1) { 714 if (dd->ofill) { 715 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A)); 716 } else { 717 DMBoundaryType bx; 718 PetscMPIInt size; 719 PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL)); 720 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 721 if (size == 1 && bx == DM_BOUNDARY_NONE) { 722 PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A)); 723 } else { 724 PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A)); 725 } 726 } 727 } else if (dim == 2) { 728 if (dd->ofill) { 729 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A)); 730 } else { 731 PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A)); 732 } 733 } else if (dim == 3) { 734 if (dd->ofill) { 735 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A)); 736 } else { 737 PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A)); 738 } 739 } 740 } else if (baij) { 741 if (dim == 2) { 742 PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A)); 743 } else if (dim == 3) { 744 PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A)); 745 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 746 } else if (sbaij) { 747 if (dim == 2) { 748 PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A)); 749 } else if (dim == 3) { 750 PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A)); 751 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 752 } else if (sell) { 753 if (dim == 2) { 754 PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A)); 755 } else if (dim == 3) { 756 PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A)); 757 } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim); 758 } else if (is) { 759 PetscCall(DMCreateMatrix_DA_IS(da, A)); 760 } else { 761 ISLocalToGlobalMapping ltog; 762 763 PetscCall(MatSetBlockSize(A, dof)); 764 PetscCall(MatSetUp(A)); 765 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 766 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 767 } 768 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 769 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 770 PetscCall(MatSetDM(A, da)); 771 PetscCallMPI(MPI_Comm_size(comm, &size)); 772 if (size > 1) { 773 /* change viewer to display matrix in natural ordering */ 774 PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA)); 775 PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA)); 776 } 777 *J = A; 778 PetscFunctionReturn(0); 779 } 780 781 /* ---------------------------------------------------------------------------------*/ 782 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]); 783 784 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J) 785 { 786 DM_DA *da = (DM_DA *)dm->data; 787 Mat lJ, P; 788 ISLocalToGlobalMapping ltog; 789 IS is; 790 PetscBT bt; 791 const PetscInt *e_loc, *idx; 792 PetscInt i, nel, nen, nv, dof, *gidx, n, N; 793 794 /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted) 795 We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */ 796 PetscFunctionBegin; 797 dof = da->w; 798 PetscCall(MatSetBlockSize(J, dof)); 799 PetscCall(DMGetLocalToGlobalMapping(dm, <og)); 800 801 /* flag local elements indices in local DMDA numbering */ 802 PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv)); 803 PetscCall(PetscBTCreate(nv / dof, &bt)); 804 PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */ 805 for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i])); 806 807 /* filter out (set to -1) the global indices not used by the local elements */ 808 PetscCall(PetscMalloc1(nv / dof, &gidx)); 809 PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx)); 810 PetscCall(PetscArraycpy(gidx, idx, nv / dof)); 811 PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx)); 812 for (i = 0; i < nv / dof; i++) 813 if (!PetscBTLookup(bt, i)) gidx[i] = -1; 814 PetscCall(PetscBTDestroy(&bt)); 815 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is)); 816 PetscCall(ISLocalToGlobalMappingCreateIS(is, <og)); 817 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 818 PetscCall(ISLocalToGlobalMappingDestroy(<og)); 819 PetscCall(ISDestroy(&is)); 820 821 /* Preallocation */ 822 if (dm->prealloc_skip) { 823 PetscCall(MatSetUp(J)); 824 } else { 825 PetscCall(MatISGetLocalMat(J, &lJ)); 826 PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL)); 827 PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P)); 828 PetscCall(MatSetType(P, MATPREALLOCATOR)); 829 PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog)); 830 PetscCall(MatGetSize(lJ, &N, NULL)); 831 PetscCall(MatGetLocalSize(lJ, &n, NULL)); 832 PetscCall(MatSetSizes(P, n, n, N, N)); 833 PetscCall(MatSetBlockSize(P, dof)); 834 PetscCall(MatSetUp(P)); 835 for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES)); 836 PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ)); 837 PetscCall(MatISRestoreLocalMat(J, &lJ)); 838 PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc)); 839 PetscCall(MatDestroy(&P)); 840 841 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 842 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 843 } 844 PetscFunctionReturn(0); 845 } 846 847 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J) 848 { 849 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p; 850 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 851 MPI_Comm comm; 852 PetscScalar *values; 853 DMBoundaryType bx, by; 854 ISLocalToGlobalMapping ltog; 855 DMDAStencilType st; 856 857 PetscFunctionBegin; 858 /* 859 nc - number of components per grid point 860 col - number of colors needed in one direction for single component problem 861 862 */ 863 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 864 col = 2 * s + 1; 865 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 866 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 867 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 868 869 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 870 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 871 872 PetscCall(MatSetBlockSize(J, nc)); 873 /* determine the matrix preallocation information */ 874 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 875 for (i = xs; i < xs + nx; i++) { 876 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 877 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 878 879 for (j = ys; j < ys + ny; j++) { 880 slot = i - gxs + gnx * (j - gys); 881 882 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 883 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 884 885 cnt = 0; 886 for (k = 0; k < nc; k++) { 887 for (l = lstart; l < lend + 1; l++) { 888 for (p = pstart; p < pend + 1; p++) { 889 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 890 cols[cnt++] = k + nc * (slot + gnx * l + p); 891 } 892 } 893 } 894 rows[k] = k + nc * (slot); 895 } 896 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 897 } 898 } 899 PetscCall(MatSetBlockSize(J, nc)); 900 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 901 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 902 MatPreallocateEnd(dnz, onz); 903 904 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 905 906 /* 907 For each node in the grid: we get the neighbors in the local (on processor ordering 908 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 909 PETSc ordering. 910 */ 911 if (!da->prealloc_only) { 912 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 913 for (i = xs; i < xs + nx; i++) { 914 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 915 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 916 917 for (j = ys; j < ys + ny; j++) { 918 slot = i - gxs + gnx * (j - gys); 919 920 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 921 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 922 923 cnt = 0; 924 for (k = 0; k < nc; k++) { 925 for (l = lstart; l < lend + 1; l++) { 926 for (p = pstart; p < pend + 1; p++) { 927 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 928 cols[cnt++] = k + nc * (slot + gnx * l + p); 929 } 930 } 931 } 932 rows[k] = k + nc * (slot); 933 } 934 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 935 } 936 } 937 PetscCall(PetscFree(values)); 938 /* do not copy values to GPU since they are all zero and not yet needed there */ 939 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 940 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 941 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 942 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 943 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 944 } 945 PetscCall(PetscFree2(rows, cols)); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J) 950 { 951 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 952 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 953 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 954 MPI_Comm comm; 955 PetscScalar *values; 956 DMBoundaryType bx, by, bz; 957 ISLocalToGlobalMapping ltog; 958 DMDAStencilType st; 959 960 PetscFunctionBegin; 961 /* 962 nc - number of components per grid point 963 col - number of colors needed in one direction for single component problem 964 965 */ 966 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 967 col = 2 * s + 1; 968 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 969 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 970 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 971 972 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 973 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 974 975 PetscCall(MatSetBlockSize(J, nc)); 976 /* determine the matrix preallocation information */ 977 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 978 for (i = xs; i < xs + nx; i++) { 979 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 980 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 981 for (j = ys; j < ys + ny; j++) { 982 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 983 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 984 for (k = zs; k < zs + nz; k++) { 985 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 986 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 987 988 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 989 990 cnt = 0; 991 for (l = 0; l < nc; l++) { 992 for (ii = istart; ii < iend + 1; ii++) { 993 for (jj = jstart; jj < jend + 1; jj++) { 994 for (kk = kstart; kk < kend + 1; kk++) { 995 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 996 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 997 } 998 } 999 } 1000 } 1001 rows[l] = l + nc * (slot); 1002 } 1003 PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1004 } 1005 } 1006 } 1007 PetscCall(MatSetBlockSize(J, nc)); 1008 PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz)); 1009 PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz)); 1010 MatPreallocateEnd(dnz, onz); 1011 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1012 1013 /* 1014 For each node in the grid: we get the neighbors in the local (on processor ordering 1015 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1016 PETSc ordering. 1017 */ 1018 if (!da->prealloc_only) { 1019 PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values)); 1020 for (i = xs; i < xs + nx; i++) { 1021 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1022 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1023 for (j = ys; j < ys + ny; j++) { 1024 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1025 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1026 for (k = zs; k < zs + nz; k++) { 1027 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1028 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1029 1030 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1031 1032 cnt = 0; 1033 for (l = 0; l < nc; l++) { 1034 for (ii = istart; ii < iend + 1; ii++) { 1035 for (jj = jstart; jj < jend + 1; jj++) { 1036 for (kk = kstart; kk < kend + 1; kk++) { 1037 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1038 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1039 } 1040 } 1041 } 1042 } 1043 rows[l] = l + nc * (slot); 1044 } 1045 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES)); 1046 } 1047 } 1048 } 1049 PetscCall(PetscFree(values)); 1050 /* do not copy values to GPU since they are all zero and not yet needed there */ 1051 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1052 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1053 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1054 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1055 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1056 } 1057 PetscCall(PetscFree2(rows, cols)); 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J) 1062 { 1063 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N; 1064 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1065 MPI_Comm comm; 1066 DMBoundaryType bx, by; 1067 ISLocalToGlobalMapping ltog, mltog; 1068 DMDAStencilType st; 1069 PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE; 1070 1071 PetscFunctionBegin; 1072 /* 1073 nc - number of components per grid point 1074 col - number of colors needed in one direction for single component problem 1075 1076 */ 1077 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1078 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1079 col = 2 * s + 1; 1080 /* 1081 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1082 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1083 */ 1084 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1085 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1086 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1087 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1088 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1089 1090 PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols)); 1091 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1092 1093 PetscCall(MatSetBlockSize(J, nc)); 1094 /* determine the matrix preallocation information */ 1095 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1096 for (i = xs; i < xs + nx; i++) { 1097 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1098 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1099 1100 for (j = ys; j < ys + ny; j++) { 1101 slot = i - gxs + gnx * (j - gys); 1102 1103 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1104 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1105 1106 cnt = 0; 1107 for (k = 0; k < nc; k++) { 1108 for (l = lstart; l < lend + 1; l++) { 1109 for (p = pstart; p < pend + 1; p++) { 1110 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1111 cols[cnt++] = k + nc * (slot + gnx * l + p); 1112 } 1113 } 1114 } 1115 rows[k] = k + nc * (slot); 1116 } 1117 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1118 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1119 } 1120 } 1121 PetscCall(MatSetBlockSize(J, nc)); 1122 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1123 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1124 MatPreallocateEnd(dnz, onz); 1125 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1126 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1127 1128 /* 1129 For each node in the grid: we get the neighbors in the local (on processor ordering 1130 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1131 PETSc ordering. 1132 */ 1133 if (!da->prealloc_only) { 1134 for (i = xs; i < xs + nx; i++) { 1135 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1136 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1137 1138 for (j = ys; j < ys + ny; j++) { 1139 slot = i - gxs + gnx * (j - gys); 1140 1141 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1142 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1143 1144 cnt = 0; 1145 for (l = lstart; l < lend + 1; l++) { 1146 for (p = pstart; p < pend + 1; p++) { 1147 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */ 1148 cols[cnt++] = nc * (slot + gnx * l + p); 1149 for (k = 1; k < nc; k++) { 1150 cols[cnt] = 1 + cols[cnt - 1]; 1151 cnt++; 1152 } 1153 } 1154 } 1155 } 1156 for (k = 0; k < nc; k++) rows[k] = k + nc * (slot); 1157 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1158 } 1159 } 1160 /* do not copy values to GPU since they are all zero and not yet needed there */ 1161 PetscCall(MatBoundToCPU(J, &alreadyboundtocpu)); 1162 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1163 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1164 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1165 if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1166 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1167 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1168 } 1169 PetscCall(PetscFree2(rows, cols)); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J) 1174 { 1175 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1176 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N; 1177 PetscInt lstart, lend, pstart, pend, *dnz, *onz; 1178 DM_DA *dd = (DM_DA *)da->data; 1179 PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill; 1180 MPI_Comm comm; 1181 DMBoundaryType bx, by; 1182 ISLocalToGlobalMapping ltog; 1183 DMDAStencilType st; 1184 PetscBool removedups = PETSC_FALSE; 1185 1186 PetscFunctionBegin; 1187 /* 1188 nc - number of components per grid point 1189 col - number of colors needed in one direction for single component problem 1190 1191 */ 1192 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st)); 1193 col = 2 * s + 1; 1194 /* 1195 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1196 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1197 */ 1198 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1199 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1200 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1201 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1202 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1203 1204 PetscCall(PetscMalloc1(col * col * nc, &cols)); 1205 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1206 1207 PetscCall(MatSetBlockSize(J, nc)); 1208 /* determine the matrix preallocation information */ 1209 MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz); 1210 for (i = xs; i < xs + nx; i++) { 1211 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1212 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1213 1214 for (j = ys; j < ys + ny; j++) { 1215 slot = i - gxs + gnx * (j - gys); 1216 1217 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1218 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1219 1220 for (k = 0; k < nc; k++) { 1221 cnt = 0; 1222 for (l = lstart; l < lend + 1; l++) { 1223 for (p = pstart; p < pend + 1; p++) { 1224 if (l || p) { 1225 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1226 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1227 } 1228 } else { 1229 if (dfill) { 1230 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1231 } else { 1232 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1233 } 1234 } 1235 } 1236 } 1237 row = k + nc * (slot); 1238 maxcnt = PetscMax(maxcnt, cnt); 1239 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1240 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 1241 } 1242 } 1243 } 1244 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1245 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1246 MatPreallocateEnd(dnz, onz); 1247 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1248 1249 /* 1250 For each node in the grid: we get the neighbors in the local (on processor ordering 1251 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1252 PETSc ordering. 1253 */ 1254 if (!da->prealloc_only) { 1255 for (i = xs; i < xs + nx; i++) { 1256 pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1257 pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1258 1259 for (j = ys; j < ys + ny; j++) { 1260 slot = i - gxs + gnx * (j - gys); 1261 1262 lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1263 lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1264 1265 for (k = 0; k < nc; k++) { 1266 cnt = 0; 1267 for (l = lstart; l < lend + 1; l++) { 1268 for (p = pstart; p < pend + 1; p++) { 1269 if (l || p) { 1270 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */ 1271 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p); 1272 } 1273 } else { 1274 if (dfill) { 1275 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p); 1276 } else { 1277 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p); 1278 } 1279 } 1280 } 1281 } 1282 row = k + nc * (slot); 1283 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1284 } 1285 } 1286 } 1287 /* do not copy values to GPU since they are all zero and not yet needed there */ 1288 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1289 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1290 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1291 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1292 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1293 } 1294 PetscCall(PetscFree(cols)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* ---------------------------------------------------------------------------------*/ 1299 1300 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J) 1301 { 1302 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1303 PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL; 1304 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 1305 MPI_Comm comm; 1306 DMBoundaryType bx, by, bz; 1307 ISLocalToGlobalMapping ltog, mltog; 1308 DMDAStencilType st; 1309 PetscBool removedups = PETSC_FALSE; 1310 1311 PetscFunctionBegin; 1312 /* 1313 nc - number of components per grid point 1314 col - number of colors needed in one direction for single component problem 1315 1316 */ 1317 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 1318 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1319 col = 2 * s + 1; 1320 1321 /* 1322 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 1323 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 1324 */ 1325 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 1326 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 1327 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 1328 1329 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1330 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1331 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1332 1333 PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols)); 1334 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1335 1336 PetscCall(MatSetBlockSize(J, nc)); 1337 /* determine the matrix preallocation information */ 1338 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 1339 for (i = xs; i < xs + nx; i++) { 1340 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1341 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1342 for (j = ys; j < ys + ny; j++) { 1343 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1344 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1345 for (k = zs; k < zs + nz; k++) { 1346 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1347 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1348 1349 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1350 1351 cnt = 0; 1352 for (l = 0; l < nc; l++) { 1353 for (ii = istart; ii < iend + 1; ii++) { 1354 for (jj = jstart; jj < jend + 1; jj++) { 1355 for (kk = kstart; kk < kend + 1; kk++) { 1356 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1357 cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk); 1358 } 1359 } 1360 } 1361 } 1362 rows[l] = l + nc * (slot); 1363 } 1364 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1365 else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz)); 1366 } 1367 } 1368 } 1369 PetscCall(MatSetBlockSize(J, nc)); 1370 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 1371 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 1372 MatPreallocateEnd(dnz, onz); 1373 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1374 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1375 1376 /* 1377 For each node in the grid: we get the neighbors in the local (on processor ordering 1378 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1379 PETSc ordering. 1380 */ 1381 if (!da->prealloc_only) { 1382 for (i = xs; i < xs + nx; i++) { 1383 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1384 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1385 for (j = ys; j < ys + ny; j++) { 1386 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1387 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1388 for (k = zs; k < zs + nz; k++) { 1389 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1390 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1391 1392 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1393 1394 cnt = 0; 1395 for (kk = kstart; kk < kend + 1; kk++) { 1396 for (jj = jstart; jj < jend + 1; jj++) { 1397 for (ii = istart; ii < iend + 1; ii++) { 1398 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1399 cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk); 1400 for (l = 1; l < nc; l++) { 1401 cols[cnt] = 1 + cols[cnt - 1]; 1402 cnt++; 1403 } 1404 } 1405 } 1406 } 1407 } 1408 rows[0] = nc * (slot); 1409 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1410 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1411 } 1412 } 1413 } 1414 /* do not copy values to GPU since they are all zero and not yet needed there */ 1415 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1416 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1417 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1418 if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1419 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1420 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1421 } 1422 PetscCall(PetscFree2(rows, cols)); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 /* ---------------------------------------------------------------------------------*/ 1427 1428 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J) 1429 { 1430 DM_DA *dd = (DM_DA *)da->data; 1431 PetscInt xs, nx, i, j, gxs, gnx, row, k, l; 1432 PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols; 1433 PetscInt *ofill = dd->ofill, *dfill = dd->dfill; 1434 DMBoundaryType bx; 1435 ISLocalToGlobalMapping ltog; 1436 PetscMPIInt rank, size; 1437 1438 PetscFunctionBegin; 1439 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank)); 1440 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size)); 1441 1442 /* 1443 nc - number of components per grid point 1444 1445 */ 1446 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1447 PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1"); 1448 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1449 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1450 1451 PetscCall(MatSetBlockSize(J, nc)); 1452 PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols)); 1453 1454 /* 1455 note should be smaller for first and last process with no periodic 1456 does not handle dfill 1457 */ 1458 cnt = 0; 1459 /* coupling with process to the left */ 1460 for (i = 0; i < s; i++) { 1461 for (j = 0; j < nc; j++) { 1462 ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j])); 1463 cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]); 1464 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1465 if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1466 else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]); 1467 } 1468 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1469 cnt++; 1470 } 1471 } 1472 for (i = s; i < nx - s; i++) { 1473 for (j = 0; j < nc; j++) { 1474 cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]); 1475 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1476 cnt++; 1477 } 1478 } 1479 /* coupling with process to the right */ 1480 for (i = nx - s; i < nx; i++) { 1481 for (j = 0; j < nc; j++) { 1482 ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j])); 1483 cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]); 1484 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1485 if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1486 else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]); 1487 } 1488 maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]); 1489 cnt++; 1490 } 1491 } 1492 1493 PetscCall(MatSeqAIJSetPreallocation(J, 0, cols)); 1494 PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols)); 1495 PetscCall(PetscFree2(cols, ocols)); 1496 1497 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1498 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1499 1500 /* 1501 For each node in the grid: we get the neighbors in the local (on processor ordering 1502 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1503 PETSc ordering. 1504 */ 1505 if (!da->prealloc_only) { 1506 PetscCall(PetscMalloc1(maxcnt, &cols)); 1507 row = xs * nc; 1508 /* coupling with process to the left */ 1509 for (i = xs; i < xs + s; i++) { 1510 for (j = 0; j < nc; j++) { 1511 cnt = 0; 1512 if (rank) { 1513 for (l = 0; l < s; l++) { 1514 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1515 } 1516 } 1517 if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1518 for (l = 0; l < s; l++) { 1519 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k]; 1520 } 1521 } 1522 if (dfill) { 1523 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1524 } else { 1525 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1526 } 1527 for (l = 0; l < s; l++) { 1528 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1529 } 1530 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1531 row++; 1532 } 1533 } 1534 for (i = xs + s; i < xs + nx - s; i++) { 1535 for (j = 0; j < nc; j++) { 1536 cnt = 0; 1537 for (l = 0; l < s; l++) { 1538 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1539 } 1540 if (dfill) { 1541 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1542 } else { 1543 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1544 } 1545 for (l = 0; l < s; l++) { 1546 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1547 } 1548 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1549 row++; 1550 } 1551 } 1552 /* coupling with process to the right */ 1553 for (i = xs + nx - s; i < xs + nx; i++) { 1554 for (j = 0; j < nc; j++) { 1555 cnt = 0; 1556 for (l = 0; l < s; l++) { 1557 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k]; 1558 } 1559 if (dfill) { 1560 for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k]; 1561 } else { 1562 for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k; 1563 } 1564 if (rank < size - 1) { 1565 for (l = 0; l < s; l++) { 1566 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k]; 1567 } 1568 } 1569 if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) { 1570 for (l = 0; l < s; l++) { 1571 for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k]; 1572 } 1573 } 1574 PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES)); 1575 row++; 1576 } 1577 } 1578 PetscCall(PetscFree(cols)); 1579 /* do not copy values to GPU since they are all zero and not yet needed there */ 1580 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1581 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1582 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1583 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1584 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1585 } 1586 PetscFunctionReturn(0); 1587 } 1588 1589 /* ---------------------------------------------------------------------------------*/ 1590 1591 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J) 1592 { 1593 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1594 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1595 PetscInt istart, iend; 1596 DMBoundaryType bx; 1597 ISLocalToGlobalMapping ltog, mltog; 1598 1599 PetscFunctionBegin; 1600 /* 1601 nc - number of components per grid point 1602 col - number of colors needed in one direction for single component problem 1603 1604 */ 1605 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1606 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE)); 1607 col = 2 * s + 1; 1608 1609 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1610 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1611 1612 PetscCall(MatSetBlockSize(J, nc)); 1613 PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL)); 1614 PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL)); 1615 1616 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1617 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1618 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1619 1620 /* 1621 For each node in the grid: we get the neighbors in the local (on processor ordering 1622 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1623 PETSc ordering. 1624 */ 1625 if (!da->prealloc_only) { 1626 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1627 for (i = xs; i < xs + nx; i++) { 1628 istart = PetscMax(-s, gxs - i); 1629 iend = PetscMin(s, gxs + gnx - i - 1); 1630 slot = i - gxs; 1631 1632 cnt = 0; 1633 for (i1 = istart; i1 < iend + 1; i1++) { 1634 cols[cnt++] = nc * (slot + i1); 1635 for (l = 1; l < nc; l++) { 1636 cols[cnt] = 1 + cols[cnt - 1]; 1637 cnt++; 1638 } 1639 } 1640 rows[0] = nc * (slot); 1641 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1642 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1643 } 1644 /* do not copy values to GPU since they are all zero and not yet needed there */ 1645 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1646 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1647 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1648 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1649 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1650 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1651 PetscCall(PetscFree2(rows, cols)); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 /* ---------------------------------------------------------------------------------*/ 1657 1658 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J) 1659 { 1660 PetscInt xs, nx, i, i1, slot, gxs, gnx; 1661 PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l; 1662 PetscInt istart, iend; 1663 DMBoundaryType bx; 1664 ISLocalToGlobalMapping ltog, mltog; 1665 1666 PetscFunctionBegin; 1667 /* 1668 nc - number of components per grid point 1669 col - number of colors needed in one direction for single component problem 1670 */ 1671 PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL)); 1672 col = 2 * s + 1; 1673 1674 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL)); 1675 PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL)); 1676 1677 PetscCall(MatSetBlockSize(J, nc)); 1678 PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc)); 1679 1680 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1681 PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL)); 1682 if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1683 1684 /* 1685 For each node in the grid: we get the neighbors in the local (on processor ordering 1686 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1687 PETSc ordering. 1688 */ 1689 if (!da->prealloc_only) { 1690 PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols)); 1691 for (i = xs; i < xs + nx; i++) { 1692 istart = PetscMax(-s, gxs - i); 1693 iend = PetscMin(s, gxs + gnx - i - 1); 1694 slot = i - gxs; 1695 1696 cnt = 0; 1697 for (i1 = istart; i1 < iend + 1; i1++) { 1698 cols[cnt++] = nc * (slot + i1); 1699 for (l = 1; l < nc; l++) { 1700 cols[cnt] = 1 + cols[cnt - 1]; 1701 cnt++; 1702 } 1703 } 1704 rows[0] = nc * (slot); 1705 for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1]; 1706 PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES)); 1707 } 1708 /* do not copy values to GPU since they are all zero and not yet needed there */ 1709 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1710 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1711 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1712 if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1713 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1714 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1715 PetscCall(PetscFree2(rows, cols)); 1716 } 1717 PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE)); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J) 1722 { 1723 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1724 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1725 PetscInt istart, iend, jstart, jend, ii, jj; 1726 MPI_Comm comm; 1727 PetscScalar *values; 1728 DMBoundaryType bx, by; 1729 DMDAStencilType st; 1730 ISLocalToGlobalMapping ltog; 1731 1732 PetscFunctionBegin; 1733 /* 1734 nc - number of components per grid point 1735 col - number of colors needed in one direction for single component problem 1736 */ 1737 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1738 col = 2 * s + 1; 1739 1740 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1741 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1742 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1743 1744 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1745 1746 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1747 1748 /* determine the matrix preallocation information */ 1749 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1750 for (i = xs; i < xs + nx; i++) { 1751 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1752 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1753 for (j = ys; j < ys + ny; j++) { 1754 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1755 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1756 slot = i - gxs + gnx * (j - gys); 1757 1758 /* Find block columns in block row */ 1759 cnt = 0; 1760 for (ii = istart; ii < iend + 1; ii++) { 1761 for (jj = jstart; jj < jend + 1; jj++) { 1762 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1763 cols[cnt++] = slot + ii + gnx * jj; 1764 } 1765 } 1766 } 1767 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1768 } 1769 } 1770 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1771 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1772 MatPreallocateEnd(dnz, onz); 1773 1774 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1775 1776 /* 1777 For each node in the grid: we get the neighbors in the local (on processor ordering 1778 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1779 PETSc ordering. 1780 */ 1781 if (!da->prealloc_only) { 1782 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 1783 for (i = xs; i < xs + nx; i++) { 1784 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1785 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1786 for (j = ys; j < ys + ny; j++) { 1787 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1788 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1789 slot = i - gxs + gnx * (j - gys); 1790 cnt = 0; 1791 for (ii = istart; ii < iend + 1; ii++) { 1792 for (jj = jstart; jj < jend + 1; jj++) { 1793 if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */ 1794 cols[cnt++] = slot + ii + gnx * jj; 1795 } 1796 } 1797 } 1798 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1799 } 1800 } 1801 PetscCall(PetscFree(values)); 1802 /* do not copy values to GPU since they are all zero and not yet needed there */ 1803 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1804 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1805 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1806 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1807 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1808 } 1809 PetscCall(PetscFree(cols)); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J) 1814 { 1815 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1816 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 1817 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 1818 MPI_Comm comm; 1819 PetscScalar *values; 1820 DMBoundaryType bx, by, bz; 1821 DMDAStencilType st; 1822 ISLocalToGlobalMapping ltog; 1823 1824 PetscFunctionBegin; 1825 /* 1826 nc - number of components per grid point 1827 col - number of colors needed in one direction for single component problem 1828 1829 */ 1830 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 1831 col = 2 * s + 1; 1832 1833 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 1834 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 1835 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1836 1837 PetscCall(PetscMalloc1(col * col * col, &cols)); 1838 1839 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1840 1841 /* determine the matrix preallocation information */ 1842 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 1843 for (i = xs; i < xs + nx; i++) { 1844 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1845 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1846 for (j = ys; j < ys + ny; j++) { 1847 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1848 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1849 for (k = zs; k < zs + nz; k++) { 1850 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1851 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1852 1853 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1854 1855 /* Find block columns in block row */ 1856 cnt = 0; 1857 for (ii = istart; ii < iend + 1; ii++) { 1858 for (jj = jstart; jj < jend + 1; jj++) { 1859 for (kk = kstart; kk < kend + 1; kk++) { 1860 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1861 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1862 } 1863 } 1864 } 1865 } 1866 PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz)); 1867 } 1868 } 1869 } 1870 PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz)); 1871 PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1872 MatPreallocateEnd(dnz, onz); 1873 1874 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1875 1876 /* 1877 For each node in the grid: we get the neighbors in the local (on processor ordering 1878 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1879 PETSc ordering. 1880 */ 1881 if (!da->prealloc_only) { 1882 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 1883 for (i = xs; i < xs + nx; i++) { 1884 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1885 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1886 for (j = ys; j < ys + ny; j++) { 1887 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1888 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1889 for (k = zs; k < zs + nz; k++) { 1890 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 1891 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 1892 1893 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 1894 1895 cnt = 0; 1896 for (ii = istart; ii < iend + 1; ii++) { 1897 for (jj = jstart; jj < jend + 1; jj++) { 1898 for (kk = kstart; kk < kend + 1; kk++) { 1899 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 1900 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 1901 } 1902 } 1903 } 1904 } 1905 PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 1906 } 1907 } 1908 } 1909 PetscCall(PetscFree(values)); 1910 /* do not copy values to GPU since they are all zero and not yet needed there */ 1911 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 1912 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 1913 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 1914 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 1915 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1916 } 1917 PetscCall(PetscFree(cols)); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 /* 1922 This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to 1923 identify in the local ordering with periodic domain. 1924 */ 1925 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[]) 1926 { 1927 PetscInt i, n; 1928 1929 PetscFunctionBegin; 1930 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row)); 1931 PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col)); 1932 for (i = 0, n = 0; i < *cnt; i++) { 1933 if (col[i] >= *row) col[n++] = col[i]; 1934 } 1935 *cnt = n; 1936 PetscFunctionReturn(0); 1937 } 1938 1939 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J) 1940 { 1941 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 1942 PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz; 1943 PetscInt istart, iend, jstart, jend, ii, jj; 1944 MPI_Comm comm; 1945 PetscScalar *values; 1946 DMBoundaryType bx, by; 1947 DMDAStencilType st; 1948 ISLocalToGlobalMapping ltog; 1949 1950 PetscFunctionBegin; 1951 /* 1952 nc - number of components per grid point 1953 col - number of colors needed in one direction for single component problem 1954 */ 1955 PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st)); 1956 col = 2 * s + 1; 1957 1958 PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL)); 1959 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL)); 1960 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 1961 1962 PetscCall(PetscMalloc1(col * col * nc * nc, &cols)); 1963 1964 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1965 1966 /* determine the matrix preallocation information */ 1967 MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz); 1968 for (i = xs; i < xs + nx; i++) { 1969 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 1970 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 1971 for (j = ys; j < ys + ny; j++) { 1972 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 1973 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 1974 slot = i - gxs + gnx * (j - gys); 1975 1976 /* Find block columns in block row */ 1977 cnt = 0; 1978 for (ii = istart; ii < iend + 1; ii++) { 1979 for (jj = jstart; jj < jend + 1; jj++) { 1980 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 1981 } 1982 } 1983 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 1984 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 1985 } 1986 } 1987 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 1988 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 1989 MatPreallocateEnd(dnz, onz); 1990 1991 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 1992 1993 /* 1994 For each node in the grid: we get the neighbors in the local (on processor ordering 1995 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 1996 PETSc ordering. 1997 */ 1998 if (!da->prealloc_only) { 1999 PetscCall(PetscCalloc1(col * col * nc * nc, &values)); 2000 for (i = xs; i < xs + nx; i++) { 2001 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2002 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2003 for (j = ys; j < ys + ny; j++) { 2004 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2005 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2006 slot = i - gxs + gnx * (j - gys); 2007 2008 /* Find block columns in block row */ 2009 cnt = 0; 2010 for (ii = istart; ii < iend + 1; ii++) { 2011 for (jj = jstart; jj < jend + 1; jj++) { 2012 if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj; 2013 } 2014 } 2015 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2016 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2017 } 2018 } 2019 PetscCall(PetscFree(values)); 2020 /* do not copy values to GPU since they are all zero and not yet needed there */ 2021 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2022 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2023 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2024 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2025 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2026 } 2027 PetscCall(PetscFree(cols)); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J) 2032 { 2033 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2034 PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz; 2035 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk; 2036 MPI_Comm comm; 2037 PetscScalar *values; 2038 DMBoundaryType bx, by, bz; 2039 DMDAStencilType st; 2040 ISLocalToGlobalMapping ltog; 2041 2042 PetscFunctionBegin; 2043 /* 2044 nc - number of components per grid point 2045 col - number of colors needed in one direction for single component problem 2046 */ 2047 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st)); 2048 col = 2 * s + 1; 2049 2050 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2051 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2052 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2053 2054 /* create the matrix */ 2055 PetscCall(PetscMalloc1(col * col * col, &cols)); 2056 2057 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2058 2059 /* determine the matrix preallocation information */ 2060 MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz); 2061 for (i = xs; i < xs + nx; i++) { 2062 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2063 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2064 for (j = ys; j < ys + ny; j++) { 2065 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2066 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2067 for (k = zs; k < zs + nz; k++) { 2068 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2069 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2070 2071 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2072 2073 /* Find block columns in block row */ 2074 cnt = 0; 2075 for (ii = istart; ii < iend + 1; ii++) { 2076 for (jj = jstart; jj < jend + 1; jj++) { 2077 for (kk = kstart; kk < kend + 1; kk++) { 2078 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2079 } 2080 } 2081 } 2082 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2083 PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz)); 2084 } 2085 } 2086 } 2087 PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz)); 2088 PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz)); 2089 MatPreallocateEnd(dnz, onz); 2090 2091 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2092 2093 /* 2094 For each node in the grid: we get the neighbors in the local (on processor ordering 2095 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2096 PETSc ordering. 2097 */ 2098 if (!da->prealloc_only) { 2099 PetscCall(PetscCalloc1(col * col * col * nc * nc, &values)); 2100 for (i = xs; i < xs + nx; i++) { 2101 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2102 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2103 for (j = ys; j < ys + ny; j++) { 2104 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2105 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2106 for (k = zs; k < zs + nz; k++) { 2107 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2108 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2109 2110 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2111 2112 cnt = 0; 2113 for (ii = istart; ii < iend + 1; ii++) { 2114 for (jj = jstart; jj < jend + 1; jj++) { 2115 for (kk = kstart; kk < kend + 1; kk++) { 2116 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk; 2117 } 2118 } 2119 } 2120 PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols)); 2121 PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES)); 2122 } 2123 } 2124 } 2125 PetscCall(PetscFree(values)); 2126 /* do not copy values to GPU since they are all zero and not yet needed there */ 2127 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2128 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2129 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2130 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2131 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2132 } 2133 PetscCall(PetscFree(cols)); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 /* ---------------------------------------------------------------------------------*/ 2138 2139 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J) 2140 { 2141 PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny; 2142 PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz; 2143 PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P; 2144 DM_DA *dd = (DM_DA *)da->data; 2145 PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill; 2146 MPI_Comm comm; 2147 PetscScalar *values; 2148 DMBoundaryType bx, by, bz; 2149 ISLocalToGlobalMapping ltog; 2150 DMDAStencilType st; 2151 PetscBool removedups = PETSC_FALSE; 2152 2153 PetscFunctionBegin; 2154 /* 2155 nc - number of components per grid point 2156 col - number of colors needed in one direction for single component problem 2157 2158 */ 2159 PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st)); 2160 col = 2 * s + 1; 2161 PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\ 2162 by 2*stencil_width + 1\n"); 2163 PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\ 2164 by 2*stencil_width + 1\n"); 2165 PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\ 2166 by 2*stencil_width + 1\n"); 2167 2168 /* 2169 With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times 2170 because of "wrapping" around the end of the domain hitting an entry already counted in the other direction. 2171 */ 2172 if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE; 2173 if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE; 2174 if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE; 2175 2176 PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz)); 2177 PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz)); 2178 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 2179 2180 PetscCall(PetscMalloc1(col * col * col * nc, &cols)); 2181 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 2182 2183 /* determine the matrix preallocation information */ 2184 MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz); 2185 2186 PetscCall(MatSetBlockSize(J, nc)); 2187 for (i = xs; i < xs + nx; i++) { 2188 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2189 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2190 for (j = ys; j < ys + ny; j++) { 2191 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2192 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2193 for (k = zs; k < zs + nz; k++) { 2194 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2195 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2196 2197 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2198 2199 for (l = 0; l < nc; l++) { 2200 cnt = 0; 2201 for (ii = istart; ii < iend + 1; ii++) { 2202 for (jj = jstart; jj < jend + 1; jj++) { 2203 for (kk = kstart; kk < kend + 1; kk++) { 2204 if (ii || jj || kk) { 2205 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2206 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2207 } 2208 } else { 2209 if (dfill) { 2210 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2211 } else { 2212 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2213 } 2214 } 2215 } 2216 } 2217 } 2218 row = l + nc * (slot); 2219 maxcnt = PetscMax(maxcnt, cnt); 2220 if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2221 else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz)); 2222 } 2223 } 2224 } 2225 } 2226 PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz)); 2227 PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz)); 2228 MatPreallocateEnd(dnz, onz); 2229 PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog)); 2230 2231 /* 2232 For each node in the grid: we get the neighbors in the local (on processor ordering 2233 that includes the ghost points) then MatSetValuesLocal() maps those indices to the global 2234 PETSc ordering. 2235 */ 2236 if (!da->prealloc_only) { 2237 PetscCall(PetscCalloc1(maxcnt, &values)); 2238 for (i = xs; i < xs + nx; i++) { 2239 istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i)); 2240 iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1)); 2241 for (j = ys; j < ys + ny; j++) { 2242 jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j)); 2243 jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1)); 2244 for (k = zs; k < zs + nz; k++) { 2245 kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k)); 2246 kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1)); 2247 2248 slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs); 2249 2250 for (l = 0; l < nc; l++) { 2251 cnt = 0; 2252 for (ii = istart; ii < iend + 1; ii++) { 2253 for (jj = jstart; jj < jend + 1; jj++) { 2254 for (kk = kstart; kk < kend + 1; kk++) { 2255 if (ii || jj || kk) { 2256 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/ 2257 for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2258 } 2259 } else { 2260 if (dfill) { 2261 for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2262 } else { 2263 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk); 2264 } 2265 } 2266 } 2267 } 2268 } 2269 row = l + nc * (slot); 2270 PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES)); 2271 } 2272 } 2273 } 2274 } 2275 PetscCall(PetscFree(values)); 2276 /* do not copy values to GPU since they are all zero and not yet needed there */ 2277 PetscCall(MatBindToCPU(J, PETSC_TRUE)); 2278 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 2279 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 2280 PetscCall(MatBindToCPU(J, PETSC_FALSE)); 2281 PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2282 } 2283 PetscCall(PetscFree(cols)); 2284 PetscFunctionReturn(0); 2285 } 2286