xref: /petsc/src/dm/impls/da/fdda.c (revision 8eb1d50fa6bb58b38517c609409ddf07aed47a76)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(0);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(0);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(0);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(0);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective on da
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(0);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective on da
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for dfill and ofill is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the dfill, ofill format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(0);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else if (dim > 1) {
227       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
228         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
229       }
230     }
231   }
232 
233   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
234      matrices is for the blocks, not the individual matrix elements  */
235   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
236   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
237   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
238   if (isBAIJ) {
239     dd->w  = 1;
240     dd->xs = dd->xs / nc;
241     dd->xe = dd->xe / nc;
242     dd->Xs = dd->Xs / nc;
243     dd->Xe = dd->Xe / nc;
244   }
245 
246   /*
247      We do not provide a getcoloring function in the DMDA operations because
248    the basic DMDA does not know about matrices. We think of DMDA as being
249    more low-level then matrices.
250   */
251   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
252   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
253   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
254   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
255   if (isBAIJ) {
256     dd->w  = nc;
257     dd->xs = dd->xs * nc;
258     dd->xe = dd->xe * nc;
259     dd->Xs = dd->Xs * nc;
260     dd->Xe = dd->Xe * nc;
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /* ---------------------------------------------------------------------------------*/
266 
267 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
268 {
269   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
270   PetscInt         ncolors = 0;
271   MPI_Comm         comm;
272   DMBoundaryType   bx, by;
273   DMDAStencilType  st;
274   ISColoringValue *colors;
275   DM_DA           *dd = (DM_DA *)da->data;
276 
277   PetscFunctionBegin;
278   /*
279          nc - number of components per grid point
280          col - number of colors needed in one direction for single component problem
281 
282   */
283   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
284   col = 2 * s + 1;
285   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
286   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
287   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
288 
289   /* special case as taught to us by Paul Hovland */
290   if (st == DMDA_STENCIL_STAR && s == 1) {
291     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
292   } else {
293     if (ctype == IS_COLORING_GLOBAL) {
294       if (!dd->localcoloring) {
295         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
296         ii = 0;
297         for (j = ys; j < ys + ny; j++) {
298           for (i = xs; i < xs + nx; i++) {
299             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
300           }
301         }
302         ncolors = nc + nc * (col - 1 + col * (col - 1));
303         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
304       }
305       *coloring = dd->localcoloring;
306     } else if (ctype == IS_COLORING_LOCAL) {
307       if (!dd->ghostedcoloring) {
308         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
309         ii = 0;
310         for (j = gys; j < gys + gny; j++) {
311           for (i = gxs; i < gxs + gnx; i++) {
312             for (k = 0; k < nc; k++) {
313               /* the complicated stuff is to handle periodic boundaries */
314               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
315             }
316           }
317         }
318         ncolors = nc + nc * (col - 1 + col * (col - 1));
319         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
320         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
321 
322         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
323       }
324       *coloring = dd->ghostedcoloring;
325     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
326   }
327   PetscCall(ISColoringReference(*coloring));
328   PetscFunctionReturn(0);
329 }
330 
331 /* ---------------------------------------------------------------------------------*/
332 
333 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
334 {
335   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
336   PetscInt         ncolors;
337   MPI_Comm         comm;
338   DMBoundaryType   bx, by, bz;
339   DMDAStencilType  st;
340   ISColoringValue *colors;
341   DM_DA           *dd = (DM_DA *)da->data;
342 
343   PetscFunctionBegin;
344   /*
345          nc - number of components per grid point
346          col - number of colors needed in one direction for single component problem
347 
348   */
349   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
350   col = 2 * s + 1;
351   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
354 
355   /* create the coloring */
356   if (ctype == IS_COLORING_GLOBAL) {
357     if (!dd->localcoloring) {
358       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359       ii = 0;
360       for (k = zs; k < zs + nz; k++) {
361         for (j = ys; j < ys + ny; j++) {
362           for (i = xs; i < xs + nx; i++) {
363             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364           }
365         }
366       }
367       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369     }
370     *coloring = dd->localcoloring;
371   } else if (ctype == IS_COLORING_LOCAL) {
372     if (!dd->ghostedcoloring) {
373       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374       ii = 0;
375       for (k = gzs; k < gzs + gnz; k++) {
376         for (j = gys; j < gys + gny; j++) {
377           for (i = gxs; i < gxs + gnx; i++) {
378             for (l = 0; l < nc; l++) {
379               /* the complicated stuff is to handle periodic boundaries */
380               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388     }
389     *coloring = dd->ghostedcoloring;
390   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391   PetscCall(ISColoringReference(*coloring));
392   PetscFunctionReturn(0);
393 }
394 
395 /* ---------------------------------------------------------------------------------*/
396 
397 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
398 {
399   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
400   PetscInt         ncolors;
401   MPI_Comm         comm;
402   DMBoundaryType   bx;
403   ISColoringValue *colors;
404   DM_DA           *dd = (DM_DA *)da->data;
405 
406   PetscFunctionBegin;
407   /*
408          nc - number of components per grid point
409          col - number of colors needed in one direction for single component problem
410 
411   */
412   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
413   col = 2 * s + 1;
414   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
415   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
416   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
417 
418   /* create the coloring */
419   if (ctype == IS_COLORING_GLOBAL) {
420     if (!dd->localcoloring) {
421       PetscCall(PetscMalloc1(nc * nx, &colors));
422       if (dd->ofillcols) {
423         PetscInt tc = 0;
424         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
425         i1 = 0;
426         for (i = xs; i < xs + nx; i++) {
427           for (l = 0; l < nc; l++) {
428             if (dd->ofillcols[l] && (i % col)) {
429               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
430             } else {
431               colors[i1++] = l;
432             }
433           }
434         }
435         ncolors = nc + 2 * s * tc;
436       } else {
437         i1 = 0;
438         for (i = xs; i < xs + nx; i++) {
439           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
440         }
441         ncolors = nc + nc * (col - 1);
442       }
443       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
444     }
445     *coloring = dd->localcoloring;
446   } else if (ctype == IS_COLORING_LOCAL) {
447     if (!dd->ghostedcoloring) {
448       PetscCall(PetscMalloc1(nc * gnx, &colors));
449       i1 = 0;
450       for (i = gxs; i < gxs + gnx; i++) {
451         for (l = 0; l < nc; l++) {
452           /* the complicated stuff is to handle periodic boundaries */
453           colors[i1++] = l + nc * (SetInRange(i, m) % col);
454         }
455       }
456       ncolors = nc + nc * (col - 1);
457       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
458       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
459     }
460     *coloring = dd->ghostedcoloring;
461   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
462   PetscCall(ISColoringReference(*coloring));
463   PetscFunctionReturn(0);
464 }
465 
466 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
467 {
468   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
469   PetscInt         ncolors;
470   MPI_Comm         comm;
471   DMBoundaryType   bx, by;
472   ISColoringValue *colors;
473   DM_DA           *dd = (DM_DA *)da->data;
474 
475   PetscFunctionBegin;
476   /*
477          nc - number of components per grid point
478          col - number of colors needed in one direction for single component problem
479 
480   */
481   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
482   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
483   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
484   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
485   /* create the coloring */
486   if (ctype == IS_COLORING_GLOBAL) {
487     if (!dd->localcoloring) {
488       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
489       ii = 0;
490       for (j = ys; j < ys + ny; j++) {
491         for (i = xs; i < xs + nx; i++) {
492           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
493         }
494       }
495       ncolors = 5 * nc;
496       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
497     }
498     *coloring = dd->localcoloring;
499   } else if (ctype == IS_COLORING_LOCAL) {
500     if (!dd->ghostedcoloring) {
501       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
502       ii = 0;
503       for (j = gys; j < gys + gny; j++) {
504         for (i = gxs; i < gxs + gnx; i++) {
505           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
506         }
507       }
508       ncolors = 5 * nc;
509       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
510       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
511     }
512     *coloring = dd->ghostedcoloring;
513   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
514   PetscFunctionReturn(0);
515 }
516 
517 /* =========================================================================== */
518 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
529 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
530 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
531 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
532 
533 /*@C
534    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
535 
536    Logically Collective on mat
537 
538    Input Parameters:
539 +  mat - the matrix
540 -  da - the da
541 
542    Level: intermediate
543 
544 .seealso: `Mat`, `MatSetUp()`
545 @*/
546 PetscErrorCode MatSetupDM(Mat mat, DM da)
547 {
548   PetscFunctionBegin;
549   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
550   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
551   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
552   PetscFunctionReturn(0);
553 }
554 
555 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
556 {
557   DM                da;
558   const char       *prefix;
559   Mat               Anatural;
560   AO                ao;
561   PetscInt          rstart, rend, *petsc, i;
562   IS                is;
563   MPI_Comm          comm;
564   PetscViewerFormat format;
565 
566   PetscFunctionBegin;
567   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
568   PetscCall(PetscViewerGetFormat(viewer, &format));
569   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(0);
570 
571   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
572   PetscCall(MatGetDM(A, &da));
573   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
574 
575   PetscCall(DMDAGetAO(da, &ao));
576   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
577   PetscCall(PetscMalloc1(rend - rstart, &petsc));
578   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
579   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
580   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
581 
582   /* call viewer on natural ordering */
583   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
584   PetscCall(ISDestroy(&is));
585   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
586   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
587   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
588   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
589   PetscCall(MatView(Anatural, viewer));
590   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
591   PetscCall(MatDestroy(&Anatural));
592   PetscFunctionReturn(0);
593 }
594 
595 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
596 {
597   DM       da;
598   Mat      Anatural, Aapp;
599   AO       ao;
600   PetscInt rstart, rend, *app, i, m, n, M, N;
601   IS       is;
602   MPI_Comm comm;
603 
604   PetscFunctionBegin;
605   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
606   PetscCall(MatGetDM(A, &da));
607   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
608 
609   /* Load the matrix in natural ordering */
610   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
611   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
612   PetscCall(MatGetSize(A, &M, &N));
613   PetscCall(MatGetLocalSize(A, &m, &n));
614   PetscCall(MatSetSizes(Anatural, m, n, M, N));
615   PetscCall(MatLoad(Anatural, viewer));
616 
617   /* Map natural ordering to application ordering and create IS */
618   PetscCall(DMDAGetAO(da, &ao));
619   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
620   PetscCall(PetscMalloc1(rend - rstart, &app));
621   for (i = rstart; i < rend; i++) app[i - rstart] = i;
622   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
623   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
624 
625   /* Do permutation and replace header */
626   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
627   PetscCall(MatHeaderReplace(A, &Aapp));
628   PetscCall(ISDestroy(&is));
629   PetscCall(MatDestroy(&Anatural));
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
634 {
635   PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
636   Mat      A;
637   MPI_Comm comm;
638   MatType  Atype;
639   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
640   MatType     mtype;
641   PetscMPIInt size;
642   DM_DA      *dd = (DM_DA *)da->data;
643 
644   PetscFunctionBegin;
645   PetscCall(MatInitializePackage());
646   mtype = da->mattype;
647 
648   /*
649                                   m
650           ------------------------------------------------------
651          |                                                     |
652          |                                                     |
653          |               ----------------------                |
654          |               |                    |                |
655       n  |           ny  |                    |                |
656          |               |                    |                |
657          |               .---------------------                |
658          |             (xs,ys)     nx                          |
659          |            .                                        |
660          |         (gxs,gys)                                   |
661          |                                                     |
662           -----------------------------------------------------
663   */
664 
665   /*
666          nc - number of components per grid point
667          col - number of colors needed in one direction for single component problem
668 
669   */
670   M   = dd->M;
671   N   = dd->N;
672   P   = dd->P;
673   dim = da->dim;
674   dof = dd->w;
675   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
676   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
677   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
678   PetscCall(MatCreate(comm, &A));
679   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
680   PetscCall(MatSetType(A, mtype));
681   PetscCall(MatSetFromOptions(A));
682   if (dof * nx * ny * nz < da->bind_below) {
683     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
684     PetscCall(MatBindToCPU(A, PETSC_TRUE));
685   }
686   PetscCall(MatSetDM(A, da));
687   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
688   PetscCall(MatGetType(A, &Atype));
689   /*
690      We do not provide a getmatrix function in the DMDA operations because
691    the basic DMDA does not know about matrices. We think of DMDA as being more
692    more low-level than matrices. This is kind of cheating but, cause sometimes
693    we think of DMDA has higher level than matrices.
694 
695      We could switch based on Atype (or mtype), but we do not since the
696    specialized setting routines depend only on the particular preallocation
697    details of the matrix, not the type itself.
698   */
699   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
700   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
701   if (!aij) {
702     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
703     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
704     if (!baij) {
705       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
706       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
707       if (!sbaij) {
708         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
709         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
710       }
711       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
712     }
713   }
714   if (aij) {
715     if (dim == 1) {
716       if (dd->ofill) {
717         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
718       } else {
719         DMBoundaryType bx;
720         PetscMPIInt    size;
721         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
722         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
723         if (size == 1 && bx == DM_BOUNDARY_NONE) {
724           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
725         } else {
726           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
727         }
728       }
729     } else if (dim == 2) {
730       if (dd->ofill) {
731         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
732       } else {
733         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
734       }
735     } else if (dim == 3) {
736       if (dd->ofill) {
737         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
738       } else {
739         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
740       }
741     }
742   } else if (baij) {
743     if (dim == 2) {
744       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
745     } else if (dim == 3) {
746       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
747     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
748   } else if (sbaij) {
749     if (dim == 2) {
750       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
751     } else if (dim == 3) {
752       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
753     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
754   } else if (sell) {
755     if (dim == 2) {
756       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
757     } else if (dim == 3) {
758       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
759     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
760   } else if (is) {
761     PetscCall(DMCreateMatrix_DA_IS(da, A));
762   } else {
763     ISLocalToGlobalMapping ltog;
764 
765     PetscCall(MatSetBlockSize(A, dof));
766     PetscCall(MatSetUp(A));
767     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
768     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
769   }
770   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
771   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
772   PetscCall(MatSetDM(A, da));
773   PetscCallMPI(MPI_Comm_size(comm, &size));
774   if (size > 1) {
775     /* change viewer to display matrix in natural ordering */
776     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
777     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
778   }
779   *J = A;
780   PetscFunctionReturn(0);
781 }
782 
783 /* ---------------------------------------------------------------------------------*/
784 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
785 
786 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
787 {
788   DM_DA                 *da = (DM_DA *)dm->data;
789   Mat                    lJ, P;
790   ISLocalToGlobalMapping ltog;
791   IS                     is;
792   PetscBT                bt;
793   const PetscInt        *e_loc, *idx;
794   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
795 
796   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
797      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
798   PetscFunctionBegin;
799   dof = da->w;
800   PetscCall(MatSetBlockSize(J, dof));
801   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
802 
803   /* flag local elements indices in local DMDA numbering */
804   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
805   PetscCall(PetscBTCreate(nv / dof, &bt));
806   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
807   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
808 
809   /* filter out (set to -1) the global indices not used by the local elements */
810   PetscCall(PetscMalloc1(nv / dof, &gidx));
811   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
812   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
813   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
814   for (i = 0; i < nv / dof; i++)
815     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
816   PetscCall(PetscBTDestroy(&bt));
817   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
818   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
819   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
820   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
821   PetscCall(ISDestroy(&is));
822 
823   /* Preallocation */
824   if (dm->prealloc_skip) {
825     PetscCall(MatSetUp(J));
826   } else {
827     PetscCall(MatISGetLocalMat(J, &lJ));
828     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
829     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
830     PetscCall(MatSetType(P, MATPREALLOCATOR));
831     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
832     PetscCall(MatGetSize(lJ, &N, NULL));
833     PetscCall(MatGetLocalSize(lJ, &n, NULL));
834     PetscCall(MatSetSizes(P, n, n, N, N));
835     PetscCall(MatSetBlockSize(P, dof));
836     PetscCall(MatSetUp(P));
837     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
838     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
839     PetscCall(MatISRestoreLocalMat(J, &lJ));
840     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
841     PetscCall(MatDestroy(&P));
842 
843     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
844     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
850 {
851   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
852   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
853   MPI_Comm               comm;
854   PetscScalar           *values;
855   DMBoundaryType         bx, by;
856   ISLocalToGlobalMapping ltog;
857   DMDAStencilType        st;
858 
859   PetscFunctionBegin;
860   /*
861          nc - number of components per grid point
862          col - number of colors needed in one direction for single component problem
863 
864   */
865   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
866   col = 2 * s + 1;
867   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
868   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
869   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
870 
871   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
872   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
873 
874   PetscCall(MatSetBlockSize(J, nc));
875   /* determine the matrix preallocation information */
876   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
877   for (i = xs; i < xs + nx; i++) {
878     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
879     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
880 
881     for (j = ys; j < ys + ny; j++) {
882       slot = i - gxs + gnx * (j - gys);
883 
884       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
885       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
886 
887       cnt = 0;
888       for (k = 0; k < nc; k++) {
889         for (l = lstart; l < lend + 1; l++) {
890           for (p = pstart; p < pend + 1; p++) {
891             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
892               cols[cnt++] = k + nc * (slot + gnx * l + p);
893             }
894           }
895         }
896         rows[k] = k + nc * (slot);
897       }
898       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
899     }
900   }
901   PetscCall(MatSetBlockSize(J, nc));
902   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
903   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
904   MatPreallocateEnd(dnz, onz);
905 
906   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
907 
908   /*
909     For each node in the grid: we get the neighbors in the local (on processor ordering
910     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
911     PETSc ordering.
912   */
913   if (!da->prealloc_only) {
914     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
915     for (i = xs; i < xs + nx; i++) {
916       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
917       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
918 
919       for (j = ys; j < ys + ny; j++) {
920         slot = i - gxs + gnx * (j - gys);
921 
922         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
923         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
924 
925         cnt = 0;
926         for (k = 0; k < nc; k++) {
927           for (l = lstart; l < lend + 1; l++) {
928             for (p = pstart; p < pend + 1; p++) {
929               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
930                 cols[cnt++] = k + nc * (slot + gnx * l + p);
931               }
932             }
933           }
934           rows[k] = k + nc * (slot);
935         }
936         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
937       }
938     }
939     PetscCall(PetscFree(values));
940     /* do not copy values to GPU since they are all zero and not yet needed there */
941     PetscCall(MatBindToCPU(J, PETSC_TRUE));
942     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
943     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
944     PetscCall(MatBindToCPU(J, PETSC_FALSE));
945     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
946   }
947   PetscCall(PetscFree2(rows, cols));
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
952 {
953   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
954   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
955   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
956   MPI_Comm               comm;
957   PetscScalar           *values;
958   DMBoundaryType         bx, by, bz;
959   ISLocalToGlobalMapping ltog;
960   DMDAStencilType        st;
961 
962   PetscFunctionBegin;
963   /*
964          nc - number of components per grid point
965          col - number of colors needed in one direction for single component problem
966 
967   */
968   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
969   col = 2 * s + 1;
970   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
971   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
972   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
973 
974   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
975   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
976 
977   PetscCall(MatSetBlockSize(J, nc));
978   /* determine the matrix preallocation information */
979   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
980   for (i = xs; i < xs + nx; i++) {
981     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
982     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
983     for (j = ys; j < ys + ny; j++) {
984       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
985       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
986       for (k = zs; k < zs + nz; k++) {
987         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
988         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
989 
990         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
991 
992         cnt = 0;
993         for (l = 0; l < nc; l++) {
994           for (ii = istart; ii < iend + 1; ii++) {
995             for (jj = jstart; jj < jend + 1; jj++) {
996               for (kk = kstart; kk < kend + 1; kk++) {
997                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
998                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
999                 }
1000               }
1001             }
1002           }
1003           rows[l] = l + nc * (slot);
1004         }
1005         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1006       }
1007     }
1008   }
1009   PetscCall(MatSetBlockSize(J, nc));
1010   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1011   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1012   MatPreallocateEnd(dnz, onz);
1013   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1014 
1015   /*
1016     For each node in the grid: we get the neighbors in the local (on processor ordering
1017     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1018     PETSc ordering.
1019   */
1020   if (!da->prealloc_only) {
1021     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1022     for (i = xs; i < xs + nx; i++) {
1023       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1024       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1025       for (j = ys; j < ys + ny; j++) {
1026         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1027         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1028         for (k = zs; k < zs + nz; k++) {
1029           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1030           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1031 
1032           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1033 
1034           cnt = 0;
1035           for (l = 0; l < nc; l++) {
1036             for (ii = istart; ii < iend + 1; ii++) {
1037               for (jj = jstart; jj < jend + 1; jj++) {
1038                 for (kk = kstart; kk < kend + 1; kk++) {
1039                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1040                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1041                   }
1042                 }
1043               }
1044             }
1045             rows[l] = l + nc * (slot);
1046           }
1047           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1048         }
1049       }
1050     }
1051     PetscCall(PetscFree(values));
1052     /* do not copy values to GPU since they are all zero and not yet needed there */
1053     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1054     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1055     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1056     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1057     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1058   }
1059   PetscCall(PetscFree2(rows, cols));
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1064 {
1065   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1066   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1067   MPI_Comm               comm;
1068   DMBoundaryType         bx, by;
1069   ISLocalToGlobalMapping ltog, mltog;
1070   DMDAStencilType        st;
1071   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1072 
1073   PetscFunctionBegin;
1074   /*
1075          nc - number of components per grid point
1076          col - number of colors needed in one direction for single component problem
1077 
1078   */
1079   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1080   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1081   col = 2 * s + 1;
1082   /*
1083        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1084        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1085   */
1086   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1087   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1088   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1089   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1090   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1091 
1092   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1093   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1094 
1095   PetscCall(MatSetBlockSize(J, nc));
1096   /* determine the matrix preallocation information */
1097   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1098   for (i = xs; i < xs + nx; i++) {
1099     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1100     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1101 
1102     for (j = ys; j < ys + ny; j++) {
1103       slot = i - gxs + gnx * (j - gys);
1104 
1105       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1106       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1107 
1108       cnt = 0;
1109       for (k = 0; k < nc; k++) {
1110         for (l = lstart; l < lend + 1; l++) {
1111           for (p = pstart; p < pend + 1; p++) {
1112             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1113               cols[cnt++] = k + nc * (slot + gnx * l + p);
1114             }
1115           }
1116         }
1117         rows[k] = k + nc * (slot);
1118       }
1119       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1120       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1121     }
1122   }
1123   PetscCall(MatSetBlockSize(J, nc));
1124   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1125   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1126   MatPreallocateEnd(dnz, onz);
1127   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1128   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1129 
1130   /*
1131     For each node in the grid: we get the neighbors in the local (on processor ordering
1132     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1133     PETSc ordering.
1134   */
1135   if (!da->prealloc_only) {
1136     for (i = xs; i < xs + nx; i++) {
1137       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1138       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1139 
1140       for (j = ys; j < ys + ny; j++) {
1141         slot = i - gxs + gnx * (j - gys);
1142 
1143         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1144         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1145 
1146         cnt = 0;
1147         for (l = lstart; l < lend + 1; l++) {
1148           for (p = pstart; p < pend + 1; p++) {
1149             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1150               cols[cnt++] = nc * (slot + gnx * l + p);
1151               for (k = 1; k < nc; k++) {
1152                 cols[cnt] = 1 + cols[cnt - 1];
1153                 cnt++;
1154               }
1155             }
1156           }
1157         }
1158         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1159         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1160       }
1161     }
1162     /* do not copy values to GPU since they are all zero and not yet needed there */
1163     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1164     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1165     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1166     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1167     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1168     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1169     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1170   }
1171   PetscCall(PetscFree2(rows, cols));
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1176 {
1177   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1178   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1179   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1180   DM_DA                 *dd = (DM_DA *)da->data;
1181   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1182   MPI_Comm               comm;
1183   DMBoundaryType         bx, by;
1184   ISLocalToGlobalMapping ltog;
1185   DMDAStencilType        st;
1186   PetscBool              removedups = PETSC_FALSE;
1187 
1188   PetscFunctionBegin;
1189   /*
1190          nc - number of components per grid point
1191          col - number of colors needed in one direction for single component problem
1192 
1193   */
1194   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1195   col = 2 * s + 1;
1196   /*
1197        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1198        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1199   */
1200   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1201   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1202   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1203   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1204   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1205 
1206   PetscCall(PetscMalloc1(col * col * nc, &cols));
1207   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1208 
1209   PetscCall(MatSetBlockSize(J, nc));
1210   /* determine the matrix preallocation information */
1211   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1212   for (i = xs; i < xs + nx; i++) {
1213     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1214     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1215 
1216     for (j = ys; j < ys + ny; j++) {
1217       slot = i - gxs + gnx * (j - gys);
1218 
1219       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1220       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1221 
1222       for (k = 0; k < nc; k++) {
1223         cnt = 0;
1224         for (l = lstart; l < lend + 1; l++) {
1225           for (p = pstart; p < pend + 1; p++) {
1226             if (l || p) {
1227               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1228                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1229               }
1230             } else {
1231               if (dfill) {
1232                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1233               } else {
1234                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1235               }
1236             }
1237           }
1238         }
1239         row    = k + nc * (slot);
1240         maxcnt = PetscMax(maxcnt, cnt);
1241         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1242         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1243       }
1244     }
1245   }
1246   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1247   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1248   MatPreallocateEnd(dnz, onz);
1249   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1250 
1251   /*
1252     For each node in the grid: we get the neighbors in the local (on processor ordering
1253     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1254     PETSc ordering.
1255   */
1256   if (!da->prealloc_only) {
1257     for (i = xs; i < xs + nx; i++) {
1258       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1259       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1260 
1261       for (j = ys; j < ys + ny; j++) {
1262         slot = i - gxs + gnx * (j - gys);
1263 
1264         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1265         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1266 
1267         for (k = 0; k < nc; k++) {
1268           cnt = 0;
1269           for (l = lstart; l < lend + 1; l++) {
1270             for (p = pstart; p < pend + 1; p++) {
1271               if (l || p) {
1272                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1273                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1274                 }
1275               } else {
1276                 if (dfill) {
1277                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1278                 } else {
1279                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1280                 }
1281               }
1282             }
1283           }
1284           row = k + nc * (slot);
1285           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1286         }
1287       }
1288     }
1289     /* do not copy values to GPU since they are all zero and not yet needed there */
1290     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1291     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1292     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1293     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1294     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1295   }
1296   PetscCall(PetscFree(cols));
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 /* ---------------------------------------------------------------------------------*/
1301 
1302 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1303 {
1304   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1305   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1306   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1307   MPI_Comm               comm;
1308   DMBoundaryType         bx, by, bz;
1309   ISLocalToGlobalMapping ltog, mltog;
1310   DMDAStencilType        st;
1311   PetscBool              removedups = PETSC_FALSE;
1312 
1313   PetscFunctionBegin;
1314   /*
1315          nc - number of components per grid point
1316          col - number of colors needed in one direction for single component problem
1317 
1318   */
1319   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1320   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1321   col = 2 * s + 1;
1322 
1323   /*
1324        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1325        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1326   */
1327   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1328   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1329   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1330 
1331   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1332   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1333   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1334 
1335   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1336   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1337 
1338   PetscCall(MatSetBlockSize(J, nc));
1339   /* determine the matrix preallocation information */
1340   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1341   for (i = xs; i < xs + nx; i++) {
1342     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1343     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1344     for (j = ys; j < ys + ny; j++) {
1345       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1346       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1347       for (k = zs; k < zs + nz; k++) {
1348         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1349         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1350 
1351         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1352 
1353         cnt = 0;
1354         for (l = 0; l < nc; l++) {
1355           for (ii = istart; ii < iend + 1; ii++) {
1356             for (jj = jstart; jj < jend + 1; jj++) {
1357               for (kk = kstart; kk < kend + 1; kk++) {
1358                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1359                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1360                 }
1361               }
1362             }
1363           }
1364           rows[l] = l + nc * (slot);
1365         }
1366         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1367         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1368       }
1369     }
1370   }
1371   PetscCall(MatSetBlockSize(J, nc));
1372   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1373   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1374   MatPreallocateEnd(dnz, onz);
1375   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1376   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1377 
1378   /*
1379     For each node in the grid: we get the neighbors in the local (on processor ordering
1380     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1381     PETSc ordering.
1382   */
1383   if (!da->prealloc_only) {
1384     for (i = xs; i < xs + nx; i++) {
1385       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1386       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1387       for (j = ys; j < ys + ny; j++) {
1388         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1389         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1390         for (k = zs; k < zs + nz; k++) {
1391           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1392           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1393 
1394           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1395 
1396           cnt = 0;
1397           for (kk = kstart; kk < kend + 1; kk++) {
1398             for (jj = jstart; jj < jend + 1; jj++) {
1399               for (ii = istart; ii < iend + 1; ii++) {
1400                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1401                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1402                   for (l = 1; l < nc; l++) {
1403                     cols[cnt] = 1 + cols[cnt - 1];
1404                     cnt++;
1405                   }
1406                 }
1407               }
1408             }
1409           }
1410           rows[0] = nc * (slot);
1411           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1412           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1413         }
1414       }
1415     }
1416     /* do not copy values to GPU since they are all zero and not yet needed there */
1417     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1418     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1419     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1420     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1421     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1422     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1423   }
1424   PetscCall(PetscFree2(rows, cols));
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 /* ---------------------------------------------------------------------------------*/
1429 
1430 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1431 {
1432   DM_DA                 *dd = (DM_DA *)da->data;
1433   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1434   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1435   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1436   DMBoundaryType         bx;
1437   ISLocalToGlobalMapping ltog;
1438   PetscMPIInt            rank, size;
1439 
1440   PetscFunctionBegin;
1441   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1442   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1443 
1444   /*
1445          nc - number of components per grid point
1446 
1447   */
1448   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1449   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1450   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1451   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1452 
1453   PetscCall(MatSetBlockSize(J, nc));
1454   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1455 
1456   /*
1457         note should be smaller for first and last process with no periodic
1458         does not handle dfill
1459   */
1460   cnt = 0;
1461   /* coupling with process to the left */
1462   for (i = 0; i < s; i++) {
1463     for (j = 0; j < nc; j++) {
1464       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1465       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1466       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1467         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1468         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1469       }
1470       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1471       cnt++;
1472     }
1473   }
1474   for (i = s; i < nx - s; i++) {
1475     for (j = 0; j < nc; j++) {
1476       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1477       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1478       cnt++;
1479     }
1480   }
1481   /* coupling with process to the right */
1482   for (i = nx - s; i < nx; i++) {
1483     for (j = 0; j < nc; j++) {
1484       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1485       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1486       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1487         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1488         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1489       }
1490       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1491       cnt++;
1492     }
1493   }
1494 
1495   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1496   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1497   PetscCall(PetscFree2(cols, ocols));
1498 
1499   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1500   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1501 
1502   /*
1503     For each node in the grid: we get the neighbors in the local (on processor ordering
1504     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1505     PETSc ordering.
1506   */
1507   if (!da->prealloc_only) {
1508     PetscCall(PetscMalloc1(maxcnt, &cols));
1509     row = xs * nc;
1510     /* coupling with process to the left */
1511     for (i = xs; i < xs + s; i++) {
1512       for (j = 0; j < nc; j++) {
1513         cnt = 0;
1514         if (rank) {
1515           for (l = 0; l < s; l++) {
1516             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1517           }
1518         }
1519         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1520           for (l = 0; l < s; l++) {
1521             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1522           }
1523         }
1524         if (dfill) {
1525           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1526         } else {
1527           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1528         }
1529         for (l = 0; l < s; l++) {
1530           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1531         }
1532         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1533         row++;
1534       }
1535     }
1536     for (i = xs + s; i < xs + nx - s; i++) {
1537       for (j = 0; j < nc; j++) {
1538         cnt = 0;
1539         for (l = 0; l < s; l++) {
1540           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1541         }
1542         if (dfill) {
1543           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1544         } else {
1545           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1546         }
1547         for (l = 0; l < s; l++) {
1548           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1549         }
1550         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1551         row++;
1552       }
1553     }
1554     /* coupling with process to the right */
1555     for (i = xs + nx - s; i < xs + nx; i++) {
1556       for (j = 0; j < nc; j++) {
1557         cnt = 0;
1558         for (l = 0; l < s; l++) {
1559           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1560         }
1561         if (dfill) {
1562           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1563         } else {
1564           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1565         }
1566         if (rank < size - 1) {
1567           for (l = 0; l < s; l++) {
1568             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1569           }
1570         }
1571         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1572           for (l = 0; l < s; l++) {
1573             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1574           }
1575         }
1576         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1577         row++;
1578       }
1579     }
1580     PetscCall(PetscFree(cols));
1581     /* do not copy values to GPU since they are all zero and not yet needed there */
1582     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1583     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1584     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1585     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1586     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 /* ---------------------------------------------------------------------------------*/
1592 
1593 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1594 {
1595   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1596   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1597   PetscInt               istart, iend;
1598   DMBoundaryType         bx;
1599   ISLocalToGlobalMapping ltog, mltog;
1600 
1601   PetscFunctionBegin;
1602   /*
1603          nc - number of components per grid point
1604          col - number of colors needed in one direction for single component problem
1605 
1606   */
1607   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1608   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1609   col = 2 * s + 1;
1610 
1611   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1612   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1613 
1614   PetscCall(MatSetBlockSize(J, nc));
1615   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1616   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1617 
1618   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1619   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1620   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1621 
1622   /*
1623     For each node in the grid: we get the neighbors in the local (on processor ordering
1624     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1625     PETSc ordering.
1626   */
1627   if (!da->prealloc_only) {
1628     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1629     for (i = xs; i < xs + nx; i++) {
1630       istart = PetscMax(-s, gxs - i);
1631       iend   = PetscMin(s, gxs + gnx - i - 1);
1632       slot   = i - gxs;
1633 
1634       cnt = 0;
1635       for (i1 = istart; i1 < iend + 1; i1++) {
1636         cols[cnt++] = nc * (slot + i1);
1637         for (l = 1; l < nc; l++) {
1638           cols[cnt] = 1 + cols[cnt - 1];
1639           cnt++;
1640         }
1641       }
1642       rows[0] = nc * (slot);
1643       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1644       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1645     }
1646     /* do not copy values to GPU since they are all zero and not yet needed there */
1647     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1648     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1649     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1650     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1651     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1652     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1653     PetscCall(PetscFree2(rows, cols));
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 /* ---------------------------------------------------------------------------------*/
1659 
1660 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1661 {
1662   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1663   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1664   PetscInt               istart, iend;
1665   DMBoundaryType         bx;
1666   ISLocalToGlobalMapping ltog, mltog;
1667 
1668   PetscFunctionBegin;
1669   /*
1670          nc - number of components per grid point
1671          col - number of colors needed in one direction for single component problem
1672   */
1673   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1674   col = 2 * s + 1;
1675 
1676   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1677   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1678 
1679   PetscCall(MatSetBlockSize(J, nc));
1680   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1681 
1682   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1683   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1684   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1685 
1686   /*
1687     For each node in the grid: we get the neighbors in the local (on processor ordering
1688     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1689     PETSc ordering.
1690   */
1691   if (!da->prealloc_only) {
1692     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1693     for (i = xs; i < xs + nx; i++) {
1694       istart = PetscMax(-s, gxs - i);
1695       iend   = PetscMin(s, gxs + gnx - i - 1);
1696       slot   = i - gxs;
1697 
1698       cnt = 0;
1699       for (i1 = istart; i1 < iend + 1; i1++) {
1700         cols[cnt++] = nc * (slot + i1);
1701         for (l = 1; l < nc; l++) {
1702           cols[cnt] = 1 + cols[cnt - 1];
1703           cnt++;
1704         }
1705       }
1706       rows[0] = nc * (slot);
1707       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1708       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1709     }
1710     /* do not copy values to GPU since they are all zero and not yet needed there */
1711     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1712     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1713     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1714     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1715     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1716     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1717     PetscCall(PetscFree2(rows, cols));
1718   }
1719   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1724 {
1725   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1726   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1727   PetscInt               istart, iend, jstart, jend, ii, jj;
1728   MPI_Comm               comm;
1729   PetscScalar           *values;
1730   DMBoundaryType         bx, by;
1731   DMDAStencilType        st;
1732   ISLocalToGlobalMapping ltog;
1733 
1734   PetscFunctionBegin;
1735   /*
1736      nc - number of components per grid point
1737      col - number of colors needed in one direction for single component problem
1738   */
1739   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1740   col = 2 * s + 1;
1741 
1742   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1743   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1744   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1745 
1746   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1747 
1748   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1749 
1750   /* determine the matrix preallocation information */
1751   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1752   for (i = xs; i < xs + nx; i++) {
1753     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1754     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1755     for (j = ys; j < ys + ny; j++) {
1756       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1757       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1758       slot   = i - gxs + gnx * (j - gys);
1759 
1760       /* Find block columns in block row */
1761       cnt = 0;
1762       for (ii = istart; ii < iend + 1; ii++) {
1763         for (jj = jstart; jj < jend + 1; jj++) {
1764           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1765             cols[cnt++] = slot + ii + gnx * jj;
1766           }
1767         }
1768       }
1769       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1770     }
1771   }
1772   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1773   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1774   MatPreallocateEnd(dnz, onz);
1775 
1776   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1777 
1778   /*
1779     For each node in the grid: we get the neighbors in the local (on processor ordering
1780     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1781     PETSc ordering.
1782   */
1783   if (!da->prealloc_only) {
1784     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1785     for (i = xs; i < xs + nx; i++) {
1786       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1787       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1788       for (j = ys; j < ys + ny; j++) {
1789         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1790         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1791         slot   = i - gxs + gnx * (j - gys);
1792         cnt    = 0;
1793         for (ii = istart; ii < iend + 1; ii++) {
1794           for (jj = jstart; jj < jend + 1; jj++) {
1795             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1796               cols[cnt++] = slot + ii + gnx * jj;
1797             }
1798           }
1799         }
1800         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1801       }
1802     }
1803     PetscCall(PetscFree(values));
1804     /* do not copy values to GPU since they are all zero and not yet needed there */
1805     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1806     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1807     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1808     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1809     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1810   }
1811   PetscCall(PetscFree(cols));
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1816 {
1817   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1818   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1819   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1820   MPI_Comm               comm;
1821   PetscScalar           *values;
1822   DMBoundaryType         bx, by, bz;
1823   DMDAStencilType        st;
1824   ISLocalToGlobalMapping ltog;
1825 
1826   PetscFunctionBegin;
1827   /*
1828          nc - number of components per grid point
1829          col - number of colors needed in one direction for single component problem
1830 
1831   */
1832   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1833   col = 2 * s + 1;
1834 
1835   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1836   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1837   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1838 
1839   PetscCall(PetscMalloc1(col * col * col, &cols));
1840 
1841   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1842 
1843   /* determine the matrix preallocation information */
1844   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1845   for (i = xs; i < xs + nx; i++) {
1846     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1847     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1848     for (j = ys; j < ys + ny; j++) {
1849       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1850       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1851       for (k = zs; k < zs + nz; k++) {
1852         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1853         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1854 
1855         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1856 
1857         /* Find block columns in block row */
1858         cnt = 0;
1859         for (ii = istart; ii < iend + 1; ii++) {
1860           for (jj = jstart; jj < jend + 1; jj++) {
1861             for (kk = kstart; kk < kend + 1; kk++) {
1862               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1863                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1864               }
1865             }
1866           }
1867         }
1868         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1869       }
1870     }
1871   }
1872   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1873   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1874   MatPreallocateEnd(dnz, onz);
1875 
1876   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1877 
1878   /*
1879     For each node in the grid: we get the neighbors in the local (on processor ordering
1880     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1881     PETSc ordering.
1882   */
1883   if (!da->prealloc_only) {
1884     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1885     for (i = xs; i < xs + nx; i++) {
1886       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1887       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1888       for (j = ys; j < ys + ny; j++) {
1889         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1890         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1891         for (k = zs; k < zs + nz; k++) {
1892           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1893           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1894 
1895           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1896 
1897           cnt = 0;
1898           for (ii = istart; ii < iend + 1; ii++) {
1899             for (jj = jstart; jj < jend + 1; jj++) {
1900               for (kk = kstart; kk < kend + 1; kk++) {
1901                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1902                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1903                 }
1904               }
1905             }
1906           }
1907           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1908         }
1909       }
1910     }
1911     PetscCall(PetscFree(values));
1912     /* do not copy values to GPU since they are all zero and not yet needed there */
1913     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1914     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1915     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1916     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1917     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1918   }
1919   PetscCall(PetscFree(cols));
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 /*
1924   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1925   identify in the local ordering with periodic domain.
1926 */
1927 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1928 {
1929   PetscInt i, n;
1930 
1931   PetscFunctionBegin;
1932   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1933   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1934   for (i = 0, n = 0; i < *cnt; i++) {
1935     if (col[i] >= *row) col[n++] = col[i];
1936   }
1937   *cnt = n;
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1942 {
1943   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1944   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1945   PetscInt               istart, iend, jstart, jend, ii, jj;
1946   MPI_Comm               comm;
1947   PetscScalar           *values;
1948   DMBoundaryType         bx, by;
1949   DMDAStencilType        st;
1950   ISLocalToGlobalMapping ltog;
1951 
1952   PetscFunctionBegin;
1953   /*
1954      nc - number of components per grid point
1955      col - number of colors needed in one direction for single component problem
1956   */
1957   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1958   col = 2 * s + 1;
1959 
1960   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1961   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1962   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1963 
1964   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1965 
1966   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1967 
1968   /* determine the matrix preallocation information */
1969   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1970   for (i = xs; i < xs + nx; i++) {
1971     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1972     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1973     for (j = ys; j < ys + ny; j++) {
1974       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1975       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1976       slot   = i - gxs + gnx * (j - gys);
1977 
1978       /* Find block columns in block row */
1979       cnt = 0;
1980       for (ii = istart; ii < iend + 1; ii++) {
1981         for (jj = jstart; jj < jend + 1; jj++) {
1982           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1983         }
1984       }
1985       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1986       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1987     }
1988   }
1989   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1990   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1991   MatPreallocateEnd(dnz, onz);
1992 
1993   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1994 
1995   /*
1996     For each node in the grid: we get the neighbors in the local (on processor ordering
1997     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1998     PETSc ordering.
1999   */
2000   if (!da->prealloc_only) {
2001     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
2002     for (i = xs; i < xs + nx; i++) {
2003       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2004       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2005       for (j = ys; j < ys + ny; j++) {
2006         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2007         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2008         slot   = i - gxs + gnx * (j - gys);
2009 
2010         /* Find block columns in block row */
2011         cnt = 0;
2012         for (ii = istart; ii < iend + 1; ii++) {
2013           for (jj = jstart; jj < jend + 1; jj++) {
2014             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2015           }
2016         }
2017         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2018         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2019       }
2020     }
2021     PetscCall(PetscFree(values));
2022     /* do not copy values to GPU since they are all zero and not yet needed there */
2023     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2024     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2025     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2026     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2027     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2028   }
2029   PetscCall(PetscFree(cols));
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2034 {
2035   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2036   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2037   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2038   MPI_Comm               comm;
2039   PetscScalar           *values;
2040   DMBoundaryType         bx, by, bz;
2041   DMDAStencilType        st;
2042   ISLocalToGlobalMapping ltog;
2043 
2044   PetscFunctionBegin;
2045   /*
2046      nc - number of components per grid point
2047      col - number of colors needed in one direction for single component problem
2048   */
2049   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2050   col = 2 * s + 1;
2051 
2052   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2053   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2054   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2055 
2056   /* create the matrix */
2057   PetscCall(PetscMalloc1(col * col * col, &cols));
2058 
2059   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2060 
2061   /* determine the matrix preallocation information */
2062   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2063   for (i = xs; i < xs + nx; i++) {
2064     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2065     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2066     for (j = ys; j < ys + ny; j++) {
2067       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2068       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2069       for (k = zs; k < zs + nz; k++) {
2070         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2071         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2072 
2073         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2074 
2075         /* Find block columns in block row */
2076         cnt = 0;
2077         for (ii = istart; ii < iend + 1; ii++) {
2078           for (jj = jstart; jj < jend + 1; jj++) {
2079             for (kk = kstart; kk < kend + 1; kk++) {
2080               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2081             }
2082           }
2083         }
2084         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2085         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2086       }
2087     }
2088   }
2089   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2090   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2091   MatPreallocateEnd(dnz, onz);
2092 
2093   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2094 
2095   /*
2096     For each node in the grid: we get the neighbors in the local (on processor ordering
2097     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2098     PETSc ordering.
2099   */
2100   if (!da->prealloc_only) {
2101     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2102     for (i = xs; i < xs + nx; i++) {
2103       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2104       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2105       for (j = ys; j < ys + ny; j++) {
2106         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2107         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2108         for (k = zs; k < zs + nz; k++) {
2109           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2110           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2111 
2112           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2113 
2114           cnt = 0;
2115           for (ii = istart; ii < iend + 1; ii++) {
2116             for (jj = jstart; jj < jend + 1; jj++) {
2117               for (kk = kstart; kk < kend + 1; kk++) {
2118                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2119               }
2120             }
2121           }
2122           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2123           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2124         }
2125       }
2126     }
2127     PetscCall(PetscFree(values));
2128     /* do not copy values to GPU since they are all zero and not yet needed there */
2129     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2130     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2131     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2132     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2133     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2134   }
2135   PetscCall(PetscFree(cols));
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /* ---------------------------------------------------------------------------------*/
2140 
2141 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2142 {
2143   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2144   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2145   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2146   DM_DA                 *dd = (DM_DA *)da->data;
2147   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2148   MPI_Comm               comm;
2149   PetscScalar           *values;
2150   DMBoundaryType         bx, by, bz;
2151   ISLocalToGlobalMapping ltog;
2152   DMDAStencilType        st;
2153   PetscBool              removedups = PETSC_FALSE;
2154 
2155   PetscFunctionBegin;
2156   /*
2157          nc - number of components per grid point
2158          col - number of colors needed in one direction for single component problem
2159 
2160   */
2161   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2162   col = 2 * s + 1;
2163   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
2164                  by 2*stencil_width + 1\n");
2165   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
2166                  by 2*stencil_width + 1\n");
2167   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
2168                  by 2*stencil_width + 1\n");
2169 
2170   /*
2171        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2172        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2173   */
2174   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2175   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2176   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2177 
2178   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2179   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2180   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2181 
2182   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2183   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2184 
2185   /* determine the matrix preallocation information */
2186   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2187 
2188   PetscCall(MatSetBlockSize(J, nc));
2189   for (i = xs; i < xs + nx; i++) {
2190     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2191     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2192     for (j = ys; j < ys + ny; j++) {
2193       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2194       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2195       for (k = zs; k < zs + nz; k++) {
2196         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2197         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2198 
2199         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2200 
2201         for (l = 0; l < nc; l++) {
2202           cnt = 0;
2203           for (ii = istart; ii < iend + 1; ii++) {
2204             for (jj = jstart; jj < jend + 1; jj++) {
2205               for (kk = kstart; kk < kend + 1; kk++) {
2206                 if (ii || jj || kk) {
2207                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2208                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2209                   }
2210                 } else {
2211                   if (dfill) {
2212                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213                   } else {
2214                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2215                   }
2216                 }
2217               }
2218             }
2219           }
2220           row    = l + nc * (slot);
2221           maxcnt = PetscMax(maxcnt, cnt);
2222           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2223           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2224         }
2225       }
2226     }
2227   }
2228   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2229   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2230   MatPreallocateEnd(dnz, onz);
2231   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2232 
2233   /*
2234     For each node in the grid: we get the neighbors in the local (on processor ordering
2235     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2236     PETSc ordering.
2237   */
2238   if (!da->prealloc_only) {
2239     PetscCall(PetscCalloc1(maxcnt, &values));
2240     for (i = xs; i < xs + nx; i++) {
2241       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2242       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2243       for (j = ys; j < ys + ny; j++) {
2244         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2245         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2246         for (k = zs; k < zs + nz; k++) {
2247           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2248           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2249 
2250           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2251 
2252           for (l = 0; l < nc; l++) {
2253             cnt = 0;
2254             for (ii = istart; ii < iend + 1; ii++) {
2255               for (jj = jstart; jj < jend + 1; jj++) {
2256                 for (kk = kstart; kk < kend + 1; kk++) {
2257                   if (ii || jj || kk) {
2258                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2259                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2260                     }
2261                   } else {
2262                     if (dfill) {
2263                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2264                     } else {
2265                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2266                     }
2267                   }
2268                 }
2269               }
2270             }
2271             row = l + nc * (slot);
2272             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2273           }
2274         }
2275       }
2276     }
2277     PetscCall(PetscFree(values));
2278     /* do not copy values to GPU since they are all zero and not yet needed there */
2279     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2280     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2281     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2282     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2283     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2284   }
2285   PetscCall(PetscFree(cols));
2286   PetscFunctionReturn(0);
2287 }
2288