xref: /petsc/src/dm/impls/da/fdda.c (revision ade515a38977e0aa84f687ce726ba7df67fbcdec)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective on da
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective on da
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for dfill and ofill is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the dfill, ofill format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else if (dim > 1) {
227       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
228         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
229       }
230     }
231   }
232 
233   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
234      matrices is for the blocks, not the individual matrix elements  */
235   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
236   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
237   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
238   if (isBAIJ) {
239     dd->w  = 1;
240     dd->xs = dd->xs / nc;
241     dd->xe = dd->xe / nc;
242     dd->Xs = dd->Xs / nc;
243     dd->Xe = dd->Xe / nc;
244   }
245 
246   /*
247      We do not provide a getcoloring function in the DMDA operations because
248    the basic DMDA does not know about matrices. We think of DMDA as being
249    more low-level then matrices.
250   */
251   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
252   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
253   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
254   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
255   if (isBAIJ) {
256     dd->w  = nc;
257     dd->xs = dd->xs * nc;
258     dd->xe = dd->xe * nc;
259     dd->Xs = dd->Xs * nc;
260     dd->Xe = dd->Xe * nc;
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 /* ---------------------------------------------------------------------------------*/
266 
267 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
268 {
269   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
270   PetscInt         ncolors = 0;
271   MPI_Comm         comm;
272   DMBoundaryType   bx, by;
273   DMDAStencilType  st;
274   ISColoringValue *colors;
275   DM_DA           *dd = (DM_DA *)da->data;
276 
277   PetscFunctionBegin;
278   /*
279          nc - number of components per grid point
280          col - number of colors needed in one direction for single component problem
281 
282   */
283   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
284   col = 2 * s + 1;
285   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
286   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
287   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
288 
289   /* special case as taught to us by Paul Hovland */
290   if (st == DMDA_STENCIL_STAR && s == 1) {
291     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
292   } else {
293     if (ctype == IS_COLORING_GLOBAL) {
294       if (!dd->localcoloring) {
295         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
296         ii = 0;
297         for (j = ys; j < ys + ny; j++) {
298           for (i = xs; i < xs + nx; i++) {
299             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
300           }
301         }
302         ncolors = nc + nc * (col - 1 + col * (col - 1));
303         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
304       }
305       *coloring = dd->localcoloring;
306     } else if (ctype == IS_COLORING_LOCAL) {
307       if (!dd->ghostedcoloring) {
308         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
309         ii = 0;
310         for (j = gys; j < gys + gny; j++) {
311           for (i = gxs; i < gxs + gnx; i++) {
312             for (k = 0; k < nc; k++) {
313               /* the complicated stuff is to handle periodic boundaries */
314               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
315             }
316           }
317         }
318         ncolors = nc + nc * (col - 1 + col * (col - 1));
319         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
320         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
321 
322         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
323       }
324       *coloring = dd->ghostedcoloring;
325     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
326   }
327   PetscCall(ISColoringReference(*coloring));
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 /* ---------------------------------------------------------------------------------*/
332 
333 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
334 {
335   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
336   PetscInt         ncolors;
337   MPI_Comm         comm;
338   DMBoundaryType   bx, by, bz;
339   DMDAStencilType  st;
340   ISColoringValue *colors;
341   DM_DA           *dd = (DM_DA *)da->data;
342 
343   PetscFunctionBegin;
344   /*
345          nc - number of components per grid point
346          col - number of colors needed in one direction for single component problem
347   */
348   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
349   col = 2 * s + 1;
350   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
351   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
352   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
353 
354   /* create the coloring */
355   if (ctype == IS_COLORING_GLOBAL) {
356     if (!dd->localcoloring) {
357       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
358       ii = 0;
359       for (k = zs; k < zs + nz; k++) {
360         for (j = ys; j < ys + ny; j++) {
361           for (i = xs; i < xs + nx; i++) {
362             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
363           }
364         }
365       }
366       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
367       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
368     }
369     *coloring = dd->localcoloring;
370   } else if (ctype == IS_COLORING_LOCAL) {
371     if (!dd->ghostedcoloring) {
372       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
373       ii = 0;
374       for (k = gzs; k < gzs + gnz; k++) {
375         for (j = gys; j < gys + gny; j++) {
376           for (i = gxs; i < gxs + gnx; i++) {
377             for (l = 0; l < nc; l++) {
378               /* the complicated stuff is to handle periodic boundaries */
379               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
380             }
381           }
382         }
383       }
384       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
385       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
386       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
387     }
388     *coloring = dd->ghostedcoloring;
389   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
390   PetscCall(ISColoringReference(*coloring));
391   PetscFunctionReturn(PETSC_SUCCESS);
392 }
393 
394 /* ---------------------------------------------------------------------------------*/
395 
396 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
397 {
398   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
399   PetscInt         ncolors;
400   MPI_Comm         comm;
401   DMBoundaryType   bx;
402   ISColoringValue *colors;
403   DM_DA           *dd = (DM_DA *)da->data;
404 
405   PetscFunctionBegin;
406   /*
407          nc - number of components per grid point
408          col - number of colors needed in one direction for single component problem
409   */
410   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
411   col = 2 * s + 1;
412   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
413   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
414   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
415 
416   /* create the coloring */
417   if (ctype == IS_COLORING_GLOBAL) {
418     if (!dd->localcoloring) {
419       PetscCall(PetscMalloc1(nc * nx, &colors));
420       if (dd->ofillcols) {
421         PetscInt tc = 0;
422         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
423         i1 = 0;
424         for (i = xs; i < xs + nx; i++) {
425           for (l = 0; l < nc; l++) {
426             if (dd->ofillcols[l] && (i % col)) {
427               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
428             } else {
429               colors[i1++] = l;
430             }
431           }
432         }
433         ncolors = nc + 2 * s * tc;
434       } else {
435         i1 = 0;
436         for (i = xs; i < xs + nx; i++) {
437           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
438         }
439         ncolors = nc + nc * (col - 1);
440       }
441       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
442     }
443     *coloring = dd->localcoloring;
444   } else if (ctype == IS_COLORING_LOCAL) {
445     if (!dd->ghostedcoloring) {
446       PetscCall(PetscMalloc1(nc * gnx, &colors));
447       i1 = 0;
448       for (i = gxs; i < gxs + gnx; i++) {
449         for (l = 0; l < nc; l++) {
450           /* the complicated stuff is to handle periodic boundaries */
451           colors[i1++] = l + nc * (SetInRange(i, m) % col);
452         }
453       }
454       ncolors = nc + nc * (col - 1);
455       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
456       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
457     }
458     *coloring = dd->ghostedcoloring;
459   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
460   PetscCall(ISColoringReference(*coloring));
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
465 {
466   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
467   PetscInt         ncolors;
468   MPI_Comm         comm;
469   DMBoundaryType   bx, by;
470   ISColoringValue *colors;
471   DM_DA           *dd = (DM_DA *)da->data;
472 
473   PetscFunctionBegin;
474   /*
475          nc - number of components per grid point
476          col - number of colors needed in one direction for single component problem
477   */
478   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
479   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
480   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
481   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
482   /* create the coloring */
483   if (ctype == IS_COLORING_GLOBAL) {
484     if (!dd->localcoloring) {
485       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
486       ii = 0;
487       for (j = ys; j < ys + ny; j++) {
488         for (i = xs; i < xs + nx; i++) {
489           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
490         }
491       }
492       ncolors = 5 * nc;
493       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
494     }
495     *coloring = dd->localcoloring;
496   } else if (ctype == IS_COLORING_LOCAL) {
497     if (!dd->ghostedcoloring) {
498       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
499       ii = 0;
500       for (j = gys; j < gys + gny; j++) {
501         for (i = gxs; i < gxs + gnx; i++) {
502           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
503         }
504       }
505       ncolors = 5 * nc;
506       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
507       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
508     }
509     *coloring = dd->ghostedcoloring;
510   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
511   PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 
514 /* =========================================================================== */
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
529 
530 /*@C
531    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
532 
533    Logically Collective on mat
534 
535    Input Parameters:
536 +  mat - the matrix
537 -  da - the da
538 
539    Level: intermediate
540 
541 .seealso: `Mat`, `MatSetUp()`
542 @*/
543 PetscErrorCode MatSetupDM(Mat mat, DM da)
544 {
545   PetscFunctionBegin;
546   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
547   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
548   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
549   PetscFunctionReturn(PETSC_SUCCESS);
550 }
551 
552 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
553 {
554   DM                da;
555   const char       *prefix;
556   Mat               Anatural;
557   AO                ao;
558   PetscInt          rstart, rend, *petsc, i;
559   IS                is;
560   MPI_Comm          comm;
561   PetscViewerFormat format;
562 
563   PetscFunctionBegin;
564   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
565   PetscCall(PetscViewerGetFormat(viewer, &format));
566   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
567 
568   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
569   PetscCall(MatGetDM(A, &da));
570   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
571 
572   PetscCall(DMDAGetAO(da, &ao));
573   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
574   PetscCall(PetscMalloc1(rend - rstart, &petsc));
575   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
576   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
577   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
578 
579   /* call viewer on natural ordering */
580   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
581   PetscCall(ISDestroy(&is));
582   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
583   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
584   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
585   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
586   PetscCall(MatView(Anatural, viewer));
587   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
588   PetscCall(MatDestroy(&Anatural));
589   PetscFunctionReturn(PETSC_SUCCESS);
590 }
591 
592 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
593 {
594   DM       da;
595   Mat      Anatural, Aapp;
596   AO       ao;
597   PetscInt rstart, rend, *app, i, m, n, M, N;
598   IS       is;
599   MPI_Comm comm;
600 
601   PetscFunctionBegin;
602   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
603   PetscCall(MatGetDM(A, &da));
604   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
605 
606   /* Load the matrix in natural ordering */
607   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
608   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
609   PetscCall(MatGetSize(A, &M, &N));
610   PetscCall(MatGetLocalSize(A, &m, &n));
611   PetscCall(MatSetSizes(Anatural, m, n, M, N));
612   PetscCall(MatLoad(Anatural, viewer));
613 
614   /* Map natural ordering to application ordering and create IS */
615   PetscCall(DMDAGetAO(da, &ao));
616   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
617   PetscCall(PetscMalloc1(rend - rstart, &app));
618   for (i = rstart; i < rend; i++) app[i - rstart] = i;
619   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
620   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
621 
622   /* Do permutation and replace header */
623   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
624   PetscCall(MatHeaderReplace(A, &Aapp));
625   PetscCall(ISDestroy(&is));
626   PetscCall(MatDestroy(&Anatural));
627   PetscFunctionReturn(PETSC_SUCCESS);
628 }
629 
630 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
631 {
632   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
633   Mat         A;
634   MPI_Comm    comm;
635   MatType     Atype;
636   MatType     mtype;
637   PetscMPIInt size;
638   DM_DA      *dd    = (DM_DA *)da->data;
639   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
640 
641   PetscFunctionBegin;
642   PetscCall(MatInitializePackage());
643   mtype = da->mattype;
644 
645   /*
646                                   m
647           ------------------------------------------------------
648          |                                                     |
649          |                                                     |
650          |               ----------------------                |
651          |               |                    |                |
652       n  |           ny  |                    |                |
653          |               |                    |                |
654          |               .---------------------                |
655          |             (xs,ys)     nx                          |
656          |            .                                        |
657          |         (gxs,gys)                                   |
658          |                                                     |
659           -----------------------------------------------------
660   */
661 
662   /*
663          nc - number of components per grid point
664          col - number of colors needed in one direction for single component problem
665   */
666   M   = dd->M;
667   N   = dd->N;
668   P   = dd->P;
669   dim = da->dim;
670   dof = dd->w;
671   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
672   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
673   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
674   PetscCall(MatCreate(comm, &A));
675   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
676   PetscCall(MatSetType(A, mtype));
677   PetscCall(MatSetFromOptions(A));
678   if (dof * nx * ny * nz < da->bind_below) {
679     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
680     PetscCall(MatBindToCPU(A, PETSC_TRUE));
681   }
682   PetscCall(MatSetDM(A, da));
683   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
684   PetscCall(MatGetType(A, &Atype));
685   /*
686      We do not provide a getmatrix function in the DMDA operations because
687    the basic DMDA does not know about matrices. We think of DMDA as being more
688    more low-level than matrices. This is kind of cheating but, cause sometimes
689    we think of DMDA has higher level than matrices.
690 
691      We could switch based on Atype (or mtype), but we do not since the
692    specialized setting routines depend only on the particular preallocation
693    details of the matrix, not the type itself.
694   */
695   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
696   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
697   if (!aij) {
698     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
699     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
700     if (!baij) {
701       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
702       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
703       if (!sbaij) {
704         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
705         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
706       }
707       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
708     }
709   }
710   if (aij) {
711     if (dim == 1) {
712       if (dd->ofill) {
713         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
714       } else {
715         DMBoundaryType bx;
716         PetscMPIInt    size;
717         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
718         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
719         if (size == 1 && bx == DM_BOUNDARY_NONE) {
720           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
721         } else {
722           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
723         }
724       }
725     } else if (dim == 2) {
726       if (dd->ofill) {
727         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
728       } else {
729         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
730       }
731     } else if (dim == 3) {
732       if (dd->ofill) {
733         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
734       } else {
735         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
736       }
737     }
738   } else if (baij) {
739     if (dim == 2) {
740       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
741     } else if (dim == 3) {
742       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
743     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
744   } else if (sbaij) {
745     if (dim == 2) {
746       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
747     } else if (dim == 3) {
748       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
749     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
750   } else if (sell) {
751     if (dim == 2) {
752       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
753     } else if (dim == 3) {
754       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
755     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
756   } else if (is) {
757     PetscCall(DMCreateMatrix_DA_IS(da, A));
758   } else {
759     ISLocalToGlobalMapping ltog;
760 
761     PetscCall(MatSetBlockSize(A, dof));
762     PetscCall(MatSetUp(A));
763     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
764     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
765   }
766   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
767   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
768   PetscCall(MatSetDM(A, da));
769   PetscCallMPI(MPI_Comm_size(comm, &size));
770   if (size > 1) {
771     /* change viewer to display matrix in natural ordering */
772     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
773     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
774   }
775   *J = A;
776   PetscFunctionReturn(PETSC_SUCCESS);
777 }
778 
779 /* ---------------------------------------------------------------------------------*/
780 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
781 
782 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
783 {
784   DM_DA                 *da = (DM_DA *)dm->data;
785   Mat                    lJ, P;
786   ISLocalToGlobalMapping ltog;
787   IS                     is;
788   PetscBT                bt;
789   const PetscInt        *e_loc, *idx;
790   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
791 
792   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
793      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
794   PetscFunctionBegin;
795   dof = da->w;
796   PetscCall(MatSetBlockSize(J, dof));
797   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
798 
799   /* flag local elements indices in local DMDA numbering */
800   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
801   PetscCall(PetscBTCreate(nv / dof, &bt));
802   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
803   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
804 
805   /* filter out (set to -1) the global indices not used by the local elements */
806   PetscCall(PetscMalloc1(nv / dof, &gidx));
807   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
808   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
809   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
810   for (i = 0; i < nv / dof; i++)
811     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
812   PetscCall(PetscBTDestroy(&bt));
813   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
814   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
815   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
816   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
817   PetscCall(ISDestroy(&is));
818 
819   /* Preallocation */
820   if (dm->prealloc_skip) {
821     PetscCall(MatSetUp(J));
822   } else {
823     PetscCall(MatISGetLocalMat(J, &lJ));
824     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
825     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
826     PetscCall(MatSetType(P, MATPREALLOCATOR));
827     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
828     PetscCall(MatGetSize(lJ, &N, NULL));
829     PetscCall(MatGetLocalSize(lJ, &n, NULL));
830     PetscCall(MatSetSizes(P, n, n, N, N));
831     PetscCall(MatSetBlockSize(P, dof));
832     PetscCall(MatSetUp(P));
833     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
834     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
835     PetscCall(MatISRestoreLocalMat(J, &lJ));
836     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
837     PetscCall(MatDestroy(&P));
838 
839     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
840     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
841   }
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
846 {
847   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
848   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
849   MPI_Comm               comm;
850   PetscScalar           *values;
851   DMBoundaryType         bx, by;
852   ISLocalToGlobalMapping ltog;
853   DMDAStencilType        st;
854 
855   PetscFunctionBegin;
856   /*
857          nc - number of components per grid point
858          col - number of colors needed in one direction for single component problem
859   */
860   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
861   col = 2 * s + 1;
862   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
863   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
864   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
865 
866   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
867   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
868 
869   PetscCall(MatSetBlockSize(J, nc));
870   /* determine the matrix preallocation information */
871   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
872   for (i = xs; i < xs + nx; i++) {
873     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
874     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
875 
876     for (j = ys; j < ys + ny; j++) {
877       slot = i - gxs + gnx * (j - gys);
878 
879       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
880       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
881 
882       cnt = 0;
883       for (k = 0; k < nc; k++) {
884         for (l = lstart; l < lend + 1; l++) {
885           for (p = pstart; p < pend + 1; p++) {
886             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
887               cols[cnt++] = k + nc * (slot + gnx * l + p);
888             }
889           }
890         }
891         rows[k] = k + nc * (slot);
892       }
893       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
894     }
895   }
896   PetscCall(MatSetBlockSize(J, nc));
897   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
898   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
899   MatPreallocateEnd(dnz, onz);
900 
901   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
902 
903   /*
904     For each node in the grid: we get the neighbors in the local (on processor ordering
905     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
906     PETSc ordering.
907   */
908   if (!da->prealloc_only) {
909     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
910     for (i = xs; i < xs + nx; i++) {
911       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
912       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
913 
914       for (j = ys; j < ys + ny; j++) {
915         slot = i - gxs + gnx * (j - gys);
916 
917         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
918         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
919 
920         cnt = 0;
921         for (k = 0; k < nc; k++) {
922           for (l = lstart; l < lend + 1; l++) {
923             for (p = pstart; p < pend + 1; p++) {
924               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
925                 cols[cnt++] = k + nc * (slot + gnx * l + p);
926               }
927             }
928           }
929           rows[k] = k + nc * (slot);
930         }
931         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
932       }
933     }
934     PetscCall(PetscFree(values));
935     /* do not copy values to GPU since they are all zero and not yet needed there */
936     PetscCall(MatBindToCPU(J, PETSC_TRUE));
937     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
938     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
939     PetscCall(MatBindToCPU(J, PETSC_FALSE));
940     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
941   }
942   PetscCall(PetscFree2(rows, cols));
943   PetscFunctionReturn(PETSC_SUCCESS);
944 }
945 
946 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
947 {
948   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
949   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
950   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
951   MPI_Comm               comm;
952   PetscScalar           *values;
953   DMBoundaryType         bx, by, bz;
954   ISLocalToGlobalMapping ltog;
955   DMDAStencilType        st;
956 
957   PetscFunctionBegin;
958   /*
959          nc - number of components per grid point
960          col - number of colors needed in one direction for single component problem
961   */
962   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
963   col = 2 * s + 1;
964   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
965   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
966   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
967 
968   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
969   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
970 
971   PetscCall(MatSetBlockSize(J, nc));
972   /* determine the matrix preallocation information */
973   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
974   for (i = xs; i < xs + nx; i++) {
975     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
976     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
977     for (j = ys; j < ys + ny; j++) {
978       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
979       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
980       for (k = zs; k < zs + nz; k++) {
981         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
982         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
983 
984         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
985 
986         cnt = 0;
987         for (l = 0; l < nc; l++) {
988           for (ii = istart; ii < iend + 1; ii++) {
989             for (jj = jstart; jj < jend + 1; jj++) {
990               for (kk = kstart; kk < kend + 1; kk++) {
991                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
992                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
993                 }
994               }
995             }
996           }
997           rows[l] = l + nc * (slot);
998         }
999         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1000       }
1001     }
1002   }
1003   PetscCall(MatSetBlockSize(J, nc));
1004   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1005   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1006   MatPreallocateEnd(dnz, onz);
1007   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1008 
1009   /*
1010     For each node in the grid: we get the neighbors in the local (on processor ordering
1011     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1012     PETSc ordering.
1013   */
1014   if (!da->prealloc_only) {
1015     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1016     for (i = xs; i < xs + nx; i++) {
1017       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1018       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1019       for (j = ys; j < ys + ny; j++) {
1020         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1021         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1022         for (k = zs; k < zs + nz; k++) {
1023           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1024           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1025 
1026           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1027 
1028           cnt = 0;
1029           for (l = 0; l < nc; l++) {
1030             for (ii = istart; ii < iend + 1; ii++) {
1031               for (jj = jstart; jj < jend + 1; jj++) {
1032                 for (kk = kstart; kk < kend + 1; kk++) {
1033                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1034                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1035                   }
1036                 }
1037               }
1038             }
1039             rows[l] = l + nc * (slot);
1040           }
1041           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1042         }
1043       }
1044     }
1045     PetscCall(PetscFree(values));
1046     /* do not copy values to GPU since they are all zero and not yet needed there */
1047     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1048     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1049     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1050     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1051     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1052   }
1053   PetscCall(PetscFree2(rows, cols));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1058 {
1059   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1060   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1061   MPI_Comm               comm;
1062   DMBoundaryType         bx, by;
1063   ISLocalToGlobalMapping ltog, mltog;
1064   DMDAStencilType        st;
1065   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1066 
1067   PetscFunctionBegin;
1068   /*
1069          nc - number of components per grid point
1070          col - number of colors needed in one direction for single component problem
1071   */
1072   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1073   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1074   col = 2 * s + 1;
1075   /*
1076        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1077        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1078   */
1079   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1080   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1081   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1082   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1083   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1084 
1085   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1086   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1087 
1088   PetscCall(MatSetBlockSize(J, nc));
1089   /* determine the matrix preallocation information */
1090   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1091   for (i = xs; i < xs + nx; i++) {
1092     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1093     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1094 
1095     for (j = ys; j < ys + ny; j++) {
1096       slot = i - gxs + gnx * (j - gys);
1097 
1098       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1099       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1100 
1101       cnt = 0;
1102       for (k = 0; k < nc; k++) {
1103         for (l = lstart; l < lend + 1; l++) {
1104           for (p = pstart; p < pend + 1; p++) {
1105             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1106               cols[cnt++] = k + nc * (slot + gnx * l + p);
1107             }
1108           }
1109         }
1110         rows[k] = k + nc * (slot);
1111       }
1112       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1113       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1114     }
1115   }
1116   PetscCall(MatSetBlockSize(J, nc));
1117   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1118   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1119   MatPreallocateEnd(dnz, onz);
1120   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1121   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1122 
1123   /*
1124     For each node in the grid: we get the neighbors in the local (on processor ordering
1125     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1126     PETSc ordering.
1127   */
1128   if (!da->prealloc_only) {
1129     for (i = xs; i < xs + nx; i++) {
1130       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1131       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1132 
1133       for (j = ys; j < ys + ny; j++) {
1134         slot = i - gxs + gnx * (j - gys);
1135 
1136         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1137         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1138 
1139         cnt = 0;
1140         for (l = lstart; l < lend + 1; l++) {
1141           for (p = pstart; p < pend + 1; p++) {
1142             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1143               cols[cnt++] = nc * (slot + gnx * l + p);
1144               for (k = 1; k < nc; k++) {
1145                 cols[cnt] = 1 + cols[cnt - 1];
1146                 cnt++;
1147               }
1148             }
1149           }
1150         }
1151         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1152         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1153       }
1154     }
1155     /* do not copy values to GPU since they are all zero and not yet needed there */
1156     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1157     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1158     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1159     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1160     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1161     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1162     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1163   }
1164   PetscCall(PetscFree2(rows, cols));
1165   PetscFunctionReturn(PETSC_SUCCESS);
1166 }
1167 
1168 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1169 {
1170   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1171   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1172   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1173   DM_DA                 *dd = (DM_DA *)da->data;
1174   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1175   MPI_Comm               comm;
1176   DMBoundaryType         bx, by;
1177   ISLocalToGlobalMapping ltog;
1178   DMDAStencilType        st;
1179   PetscBool              removedups = PETSC_FALSE;
1180 
1181   PetscFunctionBegin;
1182   /*
1183          nc - number of components per grid point
1184          col - number of colors needed in one direction for single component problem
1185   */
1186   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1187   col = 2 * s + 1;
1188   /*
1189        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1190        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1191   */
1192   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1193   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1194   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1195   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1196   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1197 
1198   PetscCall(PetscMalloc1(col * col * nc, &cols));
1199   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1200 
1201   PetscCall(MatSetBlockSize(J, nc));
1202   /* determine the matrix preallocation information */
1203   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1204   for (i = xs; i < xs + nx; i++) {
1205     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1206     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1207 
1208     for (j = ys; j < ys + ny; j++) {
1209       slot = i - gxs + gnx * (j - gys);
1210 
1211       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1212       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1213 
1214       for (k = 0; k < nc; k++) {
1215         cnt = 0;
1216         for (l = lstart; l < lend + 1; l++) {
1217           for (p = pstart; p < pend + 1; p++) {
1218             if (l || p) {
1219               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1220                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1221               }
1222             } else {
1223               if (dfill) {
1224                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1225               } else {
1226                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1227               }
1228             }
1229           }
1230         }
1231         row    = k + nc * (slot);
1232         maxcnt = PetscMax(maxcnt, cnt);
1233         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1234         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1235       }
1236     }
1237   }
1238   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1239   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1240   MatPreallocateEnd(dnz, onz);
1241   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1242 
1243   /*
1244     For each node in the grid: we get the neighbors in the local (on processor ordering
1245     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1246     PETSc ordering.
1247   */
1248   if (!da->prealloc_only) {
1249     for (i = xs; i < xs + nx; i++) {
1250       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1251       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1252 
1253       for (j = ys; j < ys + ny; j++) {
1254         slot = i - gxs + gnx * (j - gys);
1255 
1256         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1257         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1258 
1259         for (k = 0; k < nc; k++) {
1260           cnt = 0;
1261           for (l = lstart; l < lend + 1; l++) {
1262             for (p = pstart; p < pend + 1; p++) {
1263               if (l || p) {
1264                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1265                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1266                 }
1267               } else {
1268                 if (dfill) {
1269                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1270                 } else {
1271                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1272                 }
1273               }
1274             }
1275           }
1276           row = k + nc * (slot);
1277           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1278         }
1279       }
1280     }
1281     /* do not copy values to GPU since they are all zero and not yet needed there */
1282     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1283     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1284     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1285     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1286     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1287   }
1288   PetscCall(PetscFree(cols));
1289   PetscFunctionReturn(PETSC_SUCCESS);
1290 }
1291 
1292 /* ---------------------------------------------------------------------------------*/
1293 
1294 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1295 {
1296   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1297   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1298   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1299   MPI_Comm               comm;
1300   DMBoundaryType         bx, by, bz;
1301   ISLocalToGlobalMapping ltog, mltog;
1302   DMDAStencilType        st;
1303   PetscBool              removedups = PETSC_FALSE;
1304 
1305   PetscFunctionBegin;
1306   /*
1307          nc - number of components per grid point
1308          col - number of colors needed in one direction for single component problem
1309   */
1310   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1311   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1312   col = 2 * s + 1;
1313 
1314   /*
1315        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1316        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1317   */
1318   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1319   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1320   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1321 
1322   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1323   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1324   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1325 
1326   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1327   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1328 
1329   PetscCall(MatSetBlockSize(J, nc));
1330   /* determine the matrix preallocation information */
1331   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1332   for (i = xs; i < xs + nx; i++) {
1333     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1334     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1335     for (j = ys; j < ys + ny; j++) {
1336       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1337       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1338       for (k = zs; k < zs + nz; k++) {
1339         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1340         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1341 
1342         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1343 
1344         cnt = 0;
1345         for (l = 0; l < nc; l++) {
1346           for (ii = istart; ii < iend + 1; ii++) {
1347             for (jj = jstart; jj < jend + 1; jj++) {
1348               for (kk = kstart; kk < kend + 1; kk++) {
1349                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1350                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1351                 }
1352               }
1353             }
1354           }
1355           rows[l] = l + nc * (slot);
1356         }
1357         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1358         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1359       }
1360     }
1361   }
1362   PetscCall(MatSetBlockSize(J, nc));
1363   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1364   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1365   MatPreallocateEnd(dnz, onz);
1366   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1367   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1368 
1369   /*
1370     For each node in the grid: we get the neighbors in the local (on processor ordering
1371     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1372     PETSc ordering.
1373   */
1374   if (!da->prealloc_only) {
1375     for (i = xs; i < xs + nx; i++) {
1376       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1377       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1378       for (j = ys; j < ys + ny; j++) {
1379         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1380         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1381         for (k = zs; k < zs + nz; k++) {
1382           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1383           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1384 
1385           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1386 
1387           cnt = 0;
1388           for (kk = kstart; kk < kend + 1; kk++) {
1389             for (jj = jstart; jj < jend + 1; jj++) {
1390               for (ii = istart; ii < iend + 1; ii++) {
1391                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1392                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1393                   for (l = 1; l < nc; l++) {
1394                     cols[cnt] = 1 + cols[cnt - 1];
1395                     cnt++;
1396                   }
1397                 }
1398               }
1399             }
1400           }
1401           rows[0] = nc * (slot);
1402           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1403           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1404         }
1405       }
1406     }
1407     /* do not copy values to GPU since they are all zero and not yet needed there */
1408     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1409     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1410     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1411     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1412     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1413     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1414   }
1415   PetscCall(PetscFree2(rows, cols));
1416   PetscFunctionReturn(PETSC_SUCCESS);
1417 }
1418 
1419 /* ---------------------------------------------------------------------------------*/
1420 
1421 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1422 {
1423   DM_DA                 *dd = (DM_DA *)da->data;
1424   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1425   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1426   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1427   DMBoundaryType         bx;
1428   ISLocalToGlobalMapping ltog;
1429   PetscMPIInt            rank, size;
1430 
1431   PetscFunctionBegin;
1432   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1433   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1434 
1435   /*
1436          nc - number of components per grid point
1437   */
1438   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1439   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1440   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1441   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1442 
1443   PetscCall(MatSetBlockSize(J, nc));
1444   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1445 
1446   /*
1447         note should be smaller for first and last process with no periodic
1448         does not handle dfill
1449   */
1450   cnt = 0;
1451   /* coupling with process to the left */
1452   for (i = 0; i < s; i++) {
1453     for (j = 0; j < nc; j++) {
1454       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1455       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1456       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1457         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1458         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1459       }
1460       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1461       cnt++;
1462     }
1463   }
1464   for (i = s; i < nx - s; i++) {
1465     for (j = 0; j < nc; j++) {
1466       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1467       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1468       cnt++;
1469     }
1470   }
1471   /* coupling with process to the right */
1472   for (i = nx - s; i < nx; i++) {
1473     for (j = 0; j < nc; j++) {
1474       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1475       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1476       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1477         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1478         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1479       }
1480       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1481       cnt++;
1482     }
1483   }
1484 
1485   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1486   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1487   PetscCall(PetscFree2(cols, ocols));
1488 
1489   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1490   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1491 
1492   /*
1493     For each node in the grid: we get the neighbors in the local (on processor ordering
1494     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1495     PETSc ordering.
1496   */
1497   if (!da->prealloc_only) {
1498     PetscCall(PetscMalloc1(maxcnt, &cols));
1499     row = xs * nc;
1500     /* coupling with process to the left */
1501     for (i = xs; i < xs + s; i++) {
1502       for (j = 0; j < nc; j++) {
1503         cnt = 0;
1504         if (rank) {
1505           for (l = 0; l < s; l++) {
1506             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1507           }
1508         }
1509         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1510           for (l = 0; l < s; l++) {
1511             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1512           }
1513         }
1514         if (dfill) {
1515           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1516         } else {
1517           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1518         }
1519         for (l = 0; l < s; l++) {
1520           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1521         }
1522         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1523         row++;
1524       }
1525     }
1526     for (i = xs + s; i < xs + nx - s; i++) {
1527       for (j = 0; j < nc; j++) {
1528         cnt = 0;
1529         for (l = 0; l < s; l++) {
1530           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1531         }
1532         if (dfill) {
1533           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1534         } else {
1535           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1536         }
1537         for (l = 0; l < s; l++) {
1538           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1539         }
1540         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1541         row++;
1542       }
1543     }
1544     /* coupling with process to the right */
1545     for (i = xs + nx - s; i < xs + nx; i++) {
1546       for (j = 0; j < nc; j++) {
1547         cnt = 0;
1548         for (l = 0; l < s; l++) {
1549           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1550         }
1551         if (dfill) {
1552           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1553         } else {
1554           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1555         }
1556         if (rank < size - 1) {
1557           for (l = 0; l < s; l++) {
1558             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1559           }
1560         }
1561         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1562           for (l = 0; l < s; l++) {
1563             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1564           }
1565         }
1566         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1567         row++;
1568       }
1569     }
1570     PetscCall(PetscFree(cols));
1571     /* do not copy values to GPU since they are all zero and not yet needed there */
1572     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1573     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1574     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1575     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1576     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1577   }
1578   PetscFunctionReturn(PETSC_SUCCESS);
1579 }
1580 
1581 /* ---------------------------------------------------------------------------------*/
1582 
1583 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1584 {
1585   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1586   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1587   PetscInt               istart, iend;
1588   DMBoundaryType         bx;
1589   ISLocalToGlobalMapping ltog, mltog;
1590 
1591   PetscFunctionBegin;
1592   /*
1593          nc - number of components per grid point
1594          col - number of colors needed in one direction for single component problem
1595   */
1596   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1597   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1598   col = 2 * s + 1;
1599 
1600   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1601   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1602 
1603   PetscCall(MatSetBlockSize(J, nc));
1604   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1605   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1606 
1607   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1608   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1609   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1610 
1611   /*
1612     For each node in the grid: we get the neighbors in the local (on processor ordering
1613     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1614     PETSc ordering.
1615   */
1616   if (!da->prealloc_only) {
1617     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1618     for (i = xs; i < xs + nx; i++) {
1619       istart = PetscMax(-s, gxs - i);
1620       iend   = PetscMin(s, gxs + gnx - i - 1);
1621       slot   = i - gxs;
1622 
1623       cnt = 0;
1624       for (i1 = istart; i1 < iend + 1; i1++) {
1625         cols[cnt++] = nc * (slot + i1);
1626         for (l = 1; l < nc; l++) {
1627           cols[cnt] = 1 + cols[cnt - 1];
1628           cnt++;
1629         }
1630       }
1631       rows[0] = nc * (slot);
1632       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1633       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1634     }
1635     /* do not copy values to GPU since they are all zero and not yet needed there */
1636     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1637     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1638     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1639     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1640     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1641     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1642     PetscCall(PetscFree2(rows, cols));
1643   }
1644   PetscFunctionReturn(PETSC_SUCCESS);
1645 }
1646 
1647 /* ---------------------------------------------------------------------------------*/
1648 
1649 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1650 {
1651   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1652   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1653   PetscInt               istart, iend;
1654   DMBoundaryType         bx;
1655   ISLocalToGlobalMapping ltog, mltog;
1656 
1657   PetscFunctionBegin;
1658   /*
1659          nc - number of components per grid point
1660          col - number of colors needed in one direction for single component problem
1661   */
1662   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1663   col = 2 * s + 1;
1664 
1665   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1666   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1667 
1668   PetscCall(MatSetBlockSize(J, nc));
1669   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1670 
1671   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1672   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1673   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1674 
1675   /*
1676     For each node in the grid: we get the neighbors in the local (on processor ordering
1677     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1678     PETSc ordering.
1679   */
1680   if (!da->prealloc_only) {
1681     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1682     for (i = xs; i < xs + nx; i++) {
1683       istart = PetscMax(-s, gxs - i);
1684       iend   = PetscMin(s, gxs + gnx - i - 1);
1685       slot   = i - gxs;
1686 
1687       cnt = 0;
1688       for (i1 = istart; i1 < iend + 1; i1++) {
1689         cols[cnt++] = nc * (slot + i1);
1690         for (l = 1; l < nc; l++) {
1691           cols[cnt] = 1 + cols[cnt - 1];
1692           cnt++;
1693         }
1694       }
1695       rows[0] = nc * (slot);
1696       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1697       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1698     }
1699     /* do not copy values to GPU since they are all zero and not yet needed there */
1700     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1701     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1702     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1703     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1704     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1705     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1706     PetscCall(PetscFree2(rows, cols));
1707   }
1708   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
1712 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1713 {
1714   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1715   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1716   PetscInt               istart, iend, jstart, jend, ii, jj;
1717   MPI_Comm               comm;
1718   PetscScalar           *values;
1719   DMBoundaryType         bx, by;
1720   DMDAStencilType        st;
1721   ISLocalToGlobalMapping ltog;
1722 
1723   PetscFunctionBegin;
1724   /*
1725      nc - number of components per grid point
1726      col - number of colors needed in one direction for single component problem
1727   */
1728   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1729   col = 2 * s + 1;
1730 
1731   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1732   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1733   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1734 
1735   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1736 
1737   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1738 
1739   /* determine the matrix preallocation information */
1740   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1741   for (i = xs; i < xs + nx; i++) {
1742     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1743     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1744     for (j = ys; j < ys + ny; j++) {
1745       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1746       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1747       slot   = i - gxs + gnx * (j - gys);
1748 
1749       /* Find block columns in block row */
1750       cnt = 0;
1751       for (ii = istart; ii < iend + 1; ii++) {
1752         for (jj = jstart; jj < jend + 1; jj++) {
1753           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1754             cols[cnt++] = slot + ii + gnx * jj;
1755           }
1756         }
1757       }
1758       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1759     }
1760   }
1761   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1762   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1763   MatPreallocateEnd(dnz, onz);
1764 
1765   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1766 
1767   /*
1768     For each node in the grid: we get the neighbors in the local (on processor ordering
1769     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1770     PETSc ordering.
1771   */
1772   if (!da->prealloc_only) {
1773     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1774     for (i = xs; i < xs + nx; i++) {
1775       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1776       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1777       for (j = ys; j < ys + ny; j++) {
1778         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1779         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1780         slot   = i - gxs + gnx * (j - gys);
1781         cnt    = 0;
1782         for (ii = istart; ii < iend + 1; ii++) {
1783           for (jj = jstart; jj < jend + 1; jj++) {
1784             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1785               cols[cnt++] = slot + ii + gnx * jj;
1786             }
1787           }
1788         }
1789         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1790       }
1791     }
1792     PetscCall(PetscFree(values));
1793     /* do not copy values to GPU since they are all zero and not yet needed there */
1794     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1795     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1796     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1797     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1798     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1799   }
1800   PetscCall(PetscFree(cols));
1801   PetscFunctionReturn(PETSC_SUCCESS);
1802 }
1803 
1804 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1805 {
1806   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1807   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1808   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1809   MPI_Comm               comm;
1810   PetscScalar           *values;
1811   DMBoundaryType         bx, by, bz;
1812   DMDAStencilType        st;
1813   ISLocalToGlobalMapping ltog;
1814 
1815   PetscFunctionBegin;
1816   /*
1817          nc - number of components per grid point
1818          col - number of colors needed in one direction for single component problem
1819   */
1820   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1821   col = 2 * s + 1;
1822 
1823   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1824   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1825   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1826 
1827   PetscCall(PetscMalloc1(col * col * col, &cols));
1828 
1829   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1830 
1831   /* determine the matrix preallocation information */
1832   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1833   for (i = xs; i < xs + nx; i++) {
1834     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1835     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1836     for (j = ys; j < ys + ny; j++) {
1837       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1838       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1839       for (k = zs; k < zs + nz; k++) {
1840         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1841         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1842 
1843         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1844 
1845         /* Find block columns in block row */
1846         cnt = 0;
1847         for (ii = istart; ii < iend + 1; ii++) {
1848           for (jj = jstart; jj < jend + 1; jj++) {
1849             for (kk = kstart; kk < kend + 1; kk++) {
1850               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1851                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1852               }
1853             }
1854           }
1855         }
1856         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1857       }
1858     }
1859   }
1860   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1861   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1862   MatPreallocateEnd(dnz, onz);
1863 
1864   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1865 
1866   /*
1867     For each node in the grid: we get the neighbors in the local (on processor ordering
1868     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1869     PETSc ordering.
1870   */
1871   if (!da->prealloc_only) {
1872     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1873     for (i = xs; i < xs + nx; i++) {
1874       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1875       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1876       for (j = ys; j < ys + ny; j++) {
1877         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1878         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1879         for (k = zs; k < zs + nz; k++) {
1880           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1881           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1882 
1883           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1884 
1885           cnt = 0;
1886           for (ii = istart; ii < iend + 1; ii++) {
1887             for (jj = jstart; jj < jend + 1; jj++) {
1888               for (kk = kstart; kk < kend + 1; kk++) {
1889                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1890                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1891                 }
1892               }
1893             }
1894           }
1895           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1896         }
1897       }
1898     }
1899     PetscCall(PetscFree(values));
1900     /* do not copy values to GPU since they are all zero and not yet needed there */
1901     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1902     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1903     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1904     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1905     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1906   }
1907   PetscCall(PetscFree(cols));
1908   PetscFunctionReturn(PETSC_SUCCESS);
1909 }
1910 
1911 /*
1912   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1913   identify in the local ordering with periodic domain.
1914 */
1915 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1916 {
1917   PetscInt i, n;
1918 
1919   PetscFunctionBegin;
1920   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1921   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1922   for (i = 0, n = 0; i < *cnt; i++) {
1923     if (col[i] >= *row) col[n++] = col[i];
1924   }
1925   *cnt = n;
1926   PetscFunctionReturn(PETSC_SUCCESS);
1927 }
1928 
1929 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1930 {
1931   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1932   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1933   PetscInt               istart, iend, jstart, jend, ii, jj;
1934   MPI_Comm               comm;
1935   PetscScalar           *values;
1936   DMBoundaryType         bx, by;
1937   DMDAStencilType        st;
1938   ISLocalToGlobalMapping ltog;
1939 
1940   PetscFunctionBegin;
1941   /*
1942      nc - number of components per grid point
1943      col - number of colors needed in one direction for single component problem
1944   */
1945   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1946   col = 2 * s + 1;
1947 
1948   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1949   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1950   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1951 
1952   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1953 
1954   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1955 
1956   /* determine the matrix preallocation information */
1957   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1958   for (i = xs; i < xs + nx; i++) {
1959     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1960     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1961     for (j = ys; j < ys + ny; j++) {
1962       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1963       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1964       slot   = i - gxs + gnx * (j - gys);
1965 
1966       /* Find block columns in block row */
1967       cnt = 0;
1968       for (ii = istart; ii < iend + 1; ii++) {
1969         for (jj = jstart; jj < jend + 1; jj++) {
1970           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1971         }
1972       }
1973       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1974       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1975     }
1976   }
1977   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1978   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1979   MatPreallocateEnd(dnz, onz);
1980 
1981   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1982 
1983   /*
1984     For each node in the grid: we get the neighbors in the local (on processor ordering
1985     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1986     PETSc ordering.
1987   */
1988   if (!da->prealloc_only) {
1989     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1990     for (i = xs; i < xs + nx; i++) {
1991       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1992       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1993       for (j = ys; j < ys + ny; j++) {
1994         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1995         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1996         slot   = i - gxs + gnx * (j - gys);
1997 
1998         /* Find block columns in block row */
1999         cnt = 0;
2000         for (ii = istart; ii < iend + 1; ii++) {
2001           for (jj = jstart; jj < jend + 1; jj++) {
2002             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2003           }
2004         }
2005         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2006         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2007       }
2008     }
2009     PetscCall(PetscFree(values));
2010     /* do not copy values to GPU since they are all zero and not yet needed there */
2011     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2012     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2013     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2014     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2015     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2016   }
2017   PetscCall(PetscFree(cols));
2018   PetscFunctionReturn(PETSC_SUCCESS);
2019 }
2020 
2021 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2022 {
2023   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2024   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2025   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2026   MPI_Comm               comm;
2027   PetscScalar           *values;
2028   DMBoundaryType         bx, by, bz;
2029   DMDAStencilType        st;
2030   ISLocalToGlobalMapping ltog;
2031 
2032   PetscFunctionBegin;
2033   /*
2034      nc - number of components per grid point
2035      col - number of colors needed in one direction for single component problem
2036   */
2037   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2038   col = 2 * s + 1;
2039 
2040   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2041   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2042   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2043 
2044   /* create the matrix */
2045   PetscCall(PetscMalloc1(col * col * col, &cols));
2046 
2047   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2048 
2049   /* determine the matrix preallocation information */
2050   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2051   for (i = xs; i < xs + nx; i++) {
2052     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2053     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2054     for (j = ys; j < ys + ny; j++) {
2055       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2056       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2057       for (k = zs; k < zs + nz; k++) {
2058         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2059         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2060 
2061         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2062 
2063         /* Find block columns in block row */
2064         cnt = 0;
2065         for (ii = istart; ii < iend + 1; ii++) {
2066           for (jj = jstart; jj < jend + 1; jj++) {
2067             for (kk = kstart; kk < kend + 1; kk++) {
2068               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2069             }
2070           }
2071         }
2072         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2073         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2074       }
2075     }
2076   }
2077   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2078   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2079   MatPreallocateEnd(dnz, onz);
2080 
2081   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2082 
2083   /*
2084     For each node in the grid: we get the neighbors in the local (on processor ordering
2085     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2086     PETSc ordering.
2087   */
2088   if (!da->prealloc_only) {
2089     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2090     for (i = xs; i < xs + nx; i++) {
2091       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2092       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2093       for (j = ys; j < ys + ny; j++) {
2094         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2095         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2096         for (k = zs; k < zs + nz; k++) {
2097           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2098           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2099 
2100           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2101 
2102           cnt = 0;
2103           for (ii = istart; ii < iend + 1; ii++) {
2104             for (jj = jstart; jj < jend + 1; jj++) {
2105               for (kk = kstart; kk < kend + 1; kk++) {
2106                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2107               }
2108             }
2109           }
2110           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2111           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2112         }
2113       }
2114     }
2115     PetscCall(PetscFree(values));
2116     /* do not copy values to GPU since they are all zero and not yet needed there */
2117     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2118     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2119     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2120     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2121     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2122   }
2123   PetscCall(PetscFree(cols));
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 /* ---------------------------------------------------------------------------------*/
2128 
2129 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2130 {
2131   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2132   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2133   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2134   DM_DA                 *dd = (DM_DA *)da->data;
2135   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2136   MPI_Comm               comm;
2137   PetscScalar           *values;
2138   DMBoundaryType         bx, by, bz;
2139   ISLocalToGlobalMapping ltog;
2140   DMDAStencilType        st;
2141   PetscBool              removedups = PETSC_FALSE;
2142 
2143   PetscFunctionBegin;
2144   /*
2145          nc - number of components per grid point
2146          col - number of colors needed in one direction for single component problem
2147   */
2148   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2149   col = 2 * s + 1;
2150   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
2151                  by 2*stencil_width + 1\n");
2152   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
2153                  by 2*stencil_width + 1\n");
2154   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
2155                  by 2*stencil_width + 1\n");
2156 
2157   /*
2158        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2159        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2160   */
2161   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2162   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2163   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2164 
2165   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2166   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2167   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2168 
2169   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2170   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2171 
2172   /* determine the matrix preallocation information */
2173   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2174 
2175   PetscCall(MatSetBlockSize(J, nc));
2176   for (i = xs; i < xs + nx; i++) {
2177     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2178     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2179     for (j = ys; j < ys + ny; j++) {
2180       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2181       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2182       for (k = zs; k < zs + nz; k++) {
2183         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2184         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2185 
2186         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2187 
2188         for (l = 0; l < nc; l++) {
2189           cnt = 0;
2190           for (ii = istart; ii < iend + 1; ii++) {
2191             for (jj = jstart; jj < jend + 1; jj++) {
2192               for (kk = kstart; kk < kend + 1; kk++) {
2193                 if (ii || jj || kk) {
2194                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2195                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2196                   }
2197                 } else {
2198                   if (dfill) {
2199                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2200                   } else {
2201                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2202                   }
2203                 }
2204               }
2205             }
2206           }
2207           row    = l + nc * (slot);
2208           maxcnt = PetscMax(maxcnt, cnt);
2209           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2210           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2211         }
2212       }
2213     }
2214   }
2215   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2216   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2217   MatPreallocateEnd(dnz, onz);
2218   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2219 
2220   /*
2221     For each node in the grid: we get the neighbors in the local (on processor ordering
2222     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2223     PETSc ordering.
2224   */
2225   if (!da->prealloc_only) {
2226     PetscCall(PetscCalloc1(maxcnt, &values));
2227     for (i = xs; i < xs + nx; i++) {
2228       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2229       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2230       for (j = ys; j < ys + ny; j++) {
2231         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2232         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2233         for (k = zs; k < zs + nz; k++) {
2234           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2235           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2236 
2237           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2238 
2239           for (l = 0; l < nc; l++) {
2240             cnt = 0;
2241             for (ii = istart; ii < iend + 1; ii++) {
2242               for (jj = jstart; jj < jend + 1; jj++) {
2243                 for (kk = kstart; kk < kend + 1; kk++) {
2244                   if (ii || jj || kk) {
2245                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2246                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2247                     }
2248                   } else {
2249                     if (dfill) {
2250                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2251                     } else {
2252                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2253                     }
2254                   }
2255                 }
2256               }
2257             }
2258             row = l + nc * (slot);
2259             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2260           }
2261         }
2262       }
2263     }
2264     PetscCall(PetscFree(values));
2265     /* do not copy values to GPU since they are all zero and not yet needed there */
2266     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2267     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2268     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2269     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2270     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2271   }
2272   PetscCall(PetscFree(cols));
2273   PetscFunctionReturn(PETSC_SUCCESS);
2274 }
2275