xref: /petsc/src/dm/impls/da/fdda.c (revision f236b2ad18af65eafb1995156e06d20e80c6f49d)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for `dfill` and `ofill` is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the `dfill`, `ofill` format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264 {
265   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266   PetscInt         ncolors = 0;
267   MPI_Comm         comm;
268   DMBoundaryType   bx, by;
269   DMDAStencilType  st;
270   ISColoringValue *colors;
271   DM_DA           *dd = (DM_DA *)da->data;
272 
273   PetscFunctionBegin;
274   /*
275          nc - number of components per grid point
276          col - number of colors needed in one direction for single component problem
277 
278   */
279   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280   col = 2 * s + 1;
281   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
284 
285   /* special case as taught to us by Paul Hovland */
286   if (st == DMDA_STENCIL_STAR && s == 1) {
287     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288   } else {
289     if (ctype == IS_COLORING_GLOBAL) {
290       if (!dd->localcoloring) {
291         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292         ii = 0;
293         for (j = ys; j < ys + ny; j++) {
294           for (i = xs; i < xs + nx; i++) {
295             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296           }
297         }
298         ncolors = nc + nc * (col - 1 + col * (col - 1));
299         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300       }
301       *coloring = dd->localcoloring;
302     } else if (ctype == IS_COLORING_LOCAL) {
303       if (!dd->ghostedcoloring) {
304         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305         ii = 0;
306         for (j = gys; j < gys + gny; j++) {
307           for (i = gxs; i < gxs + gnx; i++) {
308             for (k = 0; k < nc; k++) {
309               /* the complicated stuff is to handle periodic boundaries */
310               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311             }
312           }
313         }
314         ncolors = nc + nc * (col - 1 + col * (col - 1));
315         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
317 
318         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319       }
320       *coloring = dd->ghostedcoloring;
321     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322   }
323   PetscCall(ISColoringReference(*coloring));
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328 {
329   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330   PetscInt         ncolors;
331   MPI_Comm         comm;
332   DMBoundaryType   bx, by, bz;
333   DMDAStencilType  st;
334   ISColoringValue *colors;
335   DM_DA           *dd = (DM_DA *)da->data;
336 
337   PetscFunctionBegin;
338   /*
339          nc - number of components per grid point
340          col - number of colors needed in one direction for single component problem
341   */
342   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343   col = 2 * s + 1;
344   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
345                  by 2*stencil_width + 1\n");
346   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
347                  by 2*stencil_width + 1\n");
348   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
349                  by 2*stencil_width + 1\n");
350 
351   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
354 
355   /* create the coloring */
356   if (ctype == IS_COLORING_GLOBAL) {
357     if (!dd->localcoloring) {
358       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359       ii = 0;
360       for (k = zs; k < zs + nz; k++) {
361         for (j = ys; j < ys + ny; j++) {
362           for (i = xs; i < xs + nx; i++) {
363             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364           }
365         }
366       }
367       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369     }
370     *coloring = dd->localcoloring;
371   } else if (ctype == IS_COLORING_LOCAL) {
372     if (!dd->ghostedcoloring) {
373       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374       ii = 0;
375       for (k = gzs; k < gzs + gnz; k++) {
376         for (j = gys; j < gys + gny; j++) {
377           for (i = gxs; i < gxs + gnx; i++) {
378             for (l = 0; l < nc; l++) {
379               /* the complicated stuff is to handle periodic boundaries */
380               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381             }
382           }
383         }
384       }
385       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388     }
389     *coloring = dd->ghostedcoloring;
390   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391   PetscCall(ISColoringReference(*coloring));
392   PetscFunctionReturn(PETSC_SUCCESS);
393 }
394 
395 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
396 {
397   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
398   PetscInt         ncolors;
399   MPI_Comm         comm;
400   DMBoundaryType   bx;
401   ISColoringValue *colors;
402   DM_DA           *dd = (DM_DA *)da->data;
403 
404   PetscFunctionBegin;
405   /*
406          nc - number of components per grid point
407          col - number of colors needed in one direction for single component problem
408   */
409   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
410   col = 2 * s + 1;
411   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
412   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
413   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
414 
415   /* create the coloring */
416   if (ctype == IS_COLORING_GLOBAL) {
417     if (!dd->localcoloring) {
418       PetscCall(PetscMalloc1(nc * nx, &colors));
419       if (dd->ofillcols) {
420         PetscInt tc = 0;
421         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
422         i1 = 0;
423         for (i = xs; i < xs + nx; i++) {
424           for (l = 0; l < nc; l++) {
425             if (dd->ofillcols[l] && (i % col)) {
426               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
427             } else {
428               colors[i1++] = l;
429             }
430           }
431         }
432         ncolors = nc + 2 * s * tc;
433       } else {
434         i1 = 0;
435         for (i = xs; i < xs + nx; i++) {
436           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
437         }
438         ncolors = nc + nc * (col - 1);
439       }
440       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
441     }
442     *coloring = dd->localcoloring;
443   } else if (ctype == IS_COLORING_LOCAL) {
444     if (!dd->ghostedcoloring) {
445       PetscCall(PetscMalloc1(nc * gnx, &colors));
446       i1 = 0;
447       for (i = gxs; i < gxs + gnx; i++) {
448         for (l = 0; l < nc; l++) {
449           /* the complicated stuff is to handle periodic boundaries */
450           colors[i1++] = l + nc * (SetInRange(i, m) % col);
451         }
452       }
453       ncolors = nc + nc * (col - 1);
454       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
455       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
456     }
457     *coloring = dd->ghostedcoloring;
458   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
459   PetscCall(ISColoringReference(*coloring));
460   PetscFunctionReturn(PETSC_SUCCESS);
461 }
462 
463 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
464 {
465   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
466   PetscInt         ncolors;
467   MPI_Comm         comm;
468   DMBoundaryType   bx, by;
469   ISColoringValue *colors;
470   DM_DA           *dd = (DM_DA *)da->data;
471 
472   PetscFunctionBegin;
473   /*
474          nc - number of components per grid point
475          col - number of colors needed in one direction for single component problem
476   */
477   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
478   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
479   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
480   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
481   /* create the coloring */
482   if (ctype == IS_COLORING_GLOBAL) {
483     if (!dd->localcoloring) {
484       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
485       ii = 0;
486       for (j = ys; j < ys + ny; j++) {
487         for (i = xs; i < xs + nx; i++) {
488           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
489         }
490       }
491       ncolors = 5 * nc;
492       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
493     }
494     *coloring = dd->localcoloring;
495   } else if (ctype == IS_COLORING_LOCAL) {
496     if (!dd->ghostedcoloring) {
497       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
498       ii = 0;
499       for (j = gys; j < gys + gny; j++) {
500         for (i = gxs; i < gxs + gnx; i++) {
501           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
502         }
503       }
504       ncolors = 5 * nc;
505       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
506       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
507     }
508     *coloring = dd->ghostedcoloring;
509   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
510   PetscFunctionReturn(PETSC_SUCCESS);
511 }
512 
513 /* =========================================================================== */
514 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
515 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
516 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
517 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
518 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
519 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
520 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
528 
529 /*@C
530    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
531 
532    Logically Collective
533 
534    Input Parameters:
535 +  mat - the matrix
536 -  da - the da
537 
538    Level: intermediate
539 
540 .seealso: `DMDA`, `Mat`, `MatSetUp()`
541 @*/
542 PetscErrorCode MatSetupDM(Mat mat, DM da)
543 {
544   PetscFunctionBegin;
545   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
546   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
547   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
548   PetscFunctionReturn(PETSC_SUCCESS);
549 }
550 
551 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
552 {
553   DM                da;
554   const char       *prefix;
555   Mat               Anatural;
556   AO                ao;
557   PetscInt          rstart, rend, *petsc, i;
558   IS                is;
559   MPI_Comm          comm;
560   PetscViewerFormat format;
561 
562   PetscFunctionBegin;
563   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
564   PetscCall(PetscViewerGetFormat(viewer, &format));
565   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
566 
567   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
568   PetscCall(MatGetDM(A, &da));
569   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
570 
571   PetscCall(DMDAGetAO(da, &ao));
572   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
573   PetscCall(PetscMalloc1(rend - rstart, &petsc));
574   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
575   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
576   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
577 
578   /* call viewer on natural ordering */
579   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
580   PetscCall(ISDestroy(&is));
581   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
582   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
583   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
584   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
585   PetscCall(MatView(Anatural, viewer));
586   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
587   PetscCall(MatDestroy(&Anatural));
588   PetscFunctionReturn(PETSC_SUCCESS);
589 }
590 
591 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
592 {
593   DM       da;
594   Mat      Anatural, Aapp;
595   AO       ao;
596   PetscInt rstart, rend, *app, i, m, n, M, N;
597   IS       is;
598   MPI_Comm comm;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
602   PetscCall(MatGetDM(A, &da));
603   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
604 
605   /* Load the matrix in natural ordering */
606   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
607   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
608   PetscCall(MatGetSize(A, &M, &N));
609   PetscCall(MatGetLocalSize(A, &m, &n));
610   PetscCall(MatSetSizes(Anatural, m, n, M, N));
611   PetscCall(MatLoad(Anatural, viewer));
612 
613   /* Map natural ordering to application ordering and create IS */
614   PetscCall(DMDAGetAO(da, &ao));
615   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
616   PetscCall(PetscMalloc1(rend - rstart, &app));
617   for (i = rstart; i < rend; i++) app[i - rstart] = i;
618   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
619   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
620 
621   /* Do permutation and replace header */
622   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
623   PetscCall(MatHeaderReplace(A, &Aapp));
624   PetscCall(ISDestroy(&is));
625   PetscCall(MatDestroy(&Anatural));
626   PetscFunctionReturn(PETSC_SUCCESS);
627 }
628 
629 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
630 {
631   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
632   Mat         A;
633   MPI_Comm    comm;
634   MatType     Atype;
635   MatType     mtype;
636   PetscMPIInt size;
637   DM_DA      *dd    = (DM_DA *)da->data;
638   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
639 
640   PetscFunctionBegin;
641   PetscCall(MatInitializePackage());
642   mtype = da->mattype;
643 
644   /*
645                                   m
646           ------------------------------------------------------
647          |                                                     |
648          |                                                     |
649          |               ----------------------                |
650          |               |                    |                |
651       n  |           ny  |                    |                |
652          |               |                    |                |
653          |               .---------------------                |
654          |             (xs,ys)     nx                          |
655          |            .                                        |
656          |         (gxs,gys)                                   |
657          |                                                     |
658           -----------------------------------------------------
659   */
660 
661   /*
662          nc - number of components per grid point
663          col - number of colors needed in one direction for single component problem
664   */
665   M   = dd->M;
666   N   = dd->N;
667   P   = dd->P;
668   dim = da->dim;
669   dof = dd->w;
670   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
671   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
672   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
673   PetscCall(MatCreate(comm, &A));
674   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
675   PetscCall(MatSetType(A, mtype));
676   PetscCall(MatSetFromOptions(A));
677   if (dof * nx * ny * nz < da->bind_below) {
678     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
679     PetscCall(MatBindToCPU(A, PETSC_TRUE));
680   }
681   PetscCall(MatSetDM(A, da));
682   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
683   PetscCall(MatGetType(A, &Atype));
684   /*
685      We do not provide a getmatrix function in the DMDA operations because
686    the basic DMDA does not know about matrices. We think of DMDA as being more
687    more low-level than matrices. This is kind of cheating but, cause sometimes
688    we think of DMDA has higher level than matrices.
689 
690      We could switch based on Atype (or mtype), but we do not since the
691    specialized setting routines depend only on the particular preallocation
692    details of the matrix, not the type itself.
693   */
694   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
695   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
696   if (!aij) {
697     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
698     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
699     if (!baij) {
700       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
701       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
702       if (!sbaij) {
703         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
704         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
705       }
706       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
707     }
708   }
709   if (aij) {
710     if (dim == 1) {
711       if (dd->ofill) {
712         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
713       } else {
714         DMBoundaryType bx;
715         PetscMPIInt    size;
716         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
717         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
718         if (size == 1 && bx == DM_BOUNDARY_NONE) {
719           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
720         } else {
721           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
722         }
723       }
724     } else if (dim == 2) {
725       if (dd->ofill) {
726         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
727       } else {
728         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
729       }
730     } else if (dim == 3) {
731       if (dd->ofill) {
732         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
733       } else {
734         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
735       }
736     }
737   } else if (baij) {
738     if (dim == 2) {
739       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
740     } else if (dim == 3) {
741       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
742     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743   } else if (sbaij) {
744     if (dim == 2) {
745       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
746     } else if (dim == 3) {
747       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
748     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749   } else if (sell) {
750     if (dim == 2) {
751       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
752     } else if (dim == 3) {
753       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
754     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755   } else if (is) {
756     PetscCall(DMCreateMatrix_DA_IS(da, A));
757   } else {
758     ISLocalToGlobalMapping ltog;
759 
760     PetscCall(MatSetBlockSize(A, dof));
761     PetscCall(MatSetUp(A));
762     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
763     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
764   }
765   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
766   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
767   PetscCall(MatSetDM(A, da));
768   PetscCallMPI(MPI_Comm_size(comm, &size));
769   if (size > 1) {
770     /* change viewer to display matrix in natural ordering */
771     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
772     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
773   }
774   *J = A;
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 /* ---------------------------------------------------------------------------------*/
779 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
780 
781 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
782 {
783   DM_DA                 *da = (DM_DA *)dm->data;
784   Mat                    lJ, P;
785   ISLocalToGlobalMapping ltog;
786   IS                     is;
787   PetscBT                bt;
788   const PetscInt        *e_loc, *idx;
789   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
790 
791   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
792      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
793   PetscFunctionBegin;
794   dof = da->w;
795   PetscCall(MatSetBlockSize(J, dof));
796   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
797 
798   /* flag local elements indices in local DMDA numbering */
799   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
800   PetscCall(PetscBTCreate(nv / dof, &bt));
801   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
802   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
803 
804   /* filter out (set to -1) the global indices not used by the local elements */
805   PetscCall(PetscMalloc1(nv / dof, &gidx));
806   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
807   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
808   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
809   for (i = 0; i < nv / dof; i++)
810     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
811   PetscCall(PetscBTDestroy(&bt));
812   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
813   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
814   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
815   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
816   PetscCall(ISDestroy(&is));
817 
818   /* Preallocation */
819   if (dm->prealloc_skip) {
820     PetscCall(MatSetUp(J));
821   } else {
822     PetscCall(MatISGetLocalMat(J, &lJ));
823     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
824     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
825     PetscCall(MatSetType(P, MATPREALLOCATOR));
826     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
827     PetscCall(MatGetSize(lJ, &N, NULL));
828     PetscCall(MatGetLocalSize(lJ, &n, NULL));
829     PetscCall(MatSetSizes(P, n, n, N, N));
830     PetscCall(MatSetBlockSize(P, dof));
831     PetscCall(MatSetUp(P));
832     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
833     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
834     PetscCall(MatISRestoreLocalMat(J, &lJ));
835     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
836     PetscCall(MatDestroy(&P));
837 
838     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
839     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
840   }
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
845 {
846   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
847   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
848   MPI_Comm               comm;
849   PetscScalar           *values;
850   DMBoundaryType         bx, by;
851   ISLocalToGlobalMapping ltog;
852   DMDAStencilType        st;
853 
854   PetscFunctionBegin;
855   /*
856          nc - number of components per grid point
857          col - number of colors needed in one direction for single component problem
858   */
859   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
860   col = 2 * s + 1;
861   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
862   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
863   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
864 
865   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
866   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
867 
868   PetscCall(MatSetBlockSize(J, nc));
869   /* determine the matrix preallocation information */
870   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
871   for (i = xs; i < xs + nx; i++) {
872     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
873     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
874 
875     for (j = ys; j < ys + ny; j++) {
876       slot = i - gxs + gnx * (j - gys);
877 
878       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
879       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
880 
881       cnt = 0;
882       for (k = 0; k < nc; k++) {
883         for (l = lstart; l < lend + 1; l++) {
884           for (p = pstart; p < pend + 1; p++) {
885             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
886               cols[cnt++] = k + nc * (slot + gnx * l + p);
887             }
888           }
889         }
890         rows[k] = k + nc * (slot);
891       }
892       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
893     }
894   }
895   PetscCall(MatSetBlockSize(J, nc));
896   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
897   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
898   MatPreallocateEnd(dnz, onz);
899 
900   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
901 
902   /*
903     For each node in the grid: we get the neighbors in the local (on processor ordering
904     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
905     PETSc ordering.
906   */
907   if (!da->prealloc_only) {
908     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
909     for (i = xs; i < xs + nx; i++) {
910       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
911       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
912 
913       for (j = ys; j < ys + ny; j++) {
914         slot = i - gxs + gnx * (j - gys);
915 
916         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
917         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
918 
919         cnt = 0;
920         for (k = 0; k < nc; k++) {
921           for (l = lstart; l < lend + 1; l++) {
922             for (p = pstart; p < pend + 1; p++) {
923               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
924                 cols[cnt++] = k + nc * (slot + gnx * l + p);
925               }
926             }
927           }
928           rows[k] = k + nc * (slot);
929         }
930         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
931       }
932     }
933     PetscCall(PetscFree(values));
934     /* do not copy values to GPU since they are all zero and not yet needed there */
935     PetscCall(MatBindToCPU(J, PETSC_TRUE));
936     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
937     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
938     PetscCall(MatBindToCPU(J, PETSC_FALSE));
939     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
940   }
941   PetscCall(PetscFree2(rows, cols));
942   PetscFunctionReturn(PETSC_SUCCESS);
943 }
944 
945 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
946 {
947   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
948   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
949   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
950   MPI_Comm               comm;
951   PetscScalar           *values;
952   DMBoundaryType         bx, by, bz;
953   ISLocalToGlobalMapping ltog;
954   DMDAStencilType        st;
955 
956   PetscFunctionBegin;
957   /*
958          nc - number of components per grid point
959          col - number of colors needed in one direction for single component problem
960   */
961   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
962   col = 2 * s + 1;
963   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
964   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
965   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
966 
967   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
968   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
969 
970   PetscCall(MatSetBlockSize(J, nc));
971   /* determine the matrix preallocation information */
972   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
973   for (i = xs; i < xs + nx; i++) {
974     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
975     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
976     for (j = ys; j < ys + ny; j++) {
977       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
978       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
979       for (k = zs; k < zs + nz; k++) {
980         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
981         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
982 
983         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
984 
985         cnt = 0;
986         for (l = 0; l < nc; l++) {
987           for (ii = istart; ii < iend + 1; ii++) {
988             for (jj = jstart; jj < jend + 1; jj++) {
989               for (kk = kstart; kk < kend + 1; kk++) {
990                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
991                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
992                 }
993               }
994             }
995           }
996           rows[l] = l + nc * (slot);
997         }
998         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
999       }
1000     }
1001   }
1002   PetscCall(MatSetBlockSize(J, nc));
1003   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1004   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1005   MatPreallocateEnd(dnz, onz);
1006   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1007 
1008   /*
1009     For each node in the grid: we get the neighbors in the local (on processor ordering
1010     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1011     PETSc ordering.
1012   */
1013   if (!da->prealloc_only) {
1014     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1015     for (i = xs; i < xs + nx; i++) {
1016       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1017       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1018       for (j = ys; j < ys + ny; j++) {
1019         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1020         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1021         for (k = zs; k < zs + nz; k++) {
1022           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1023           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1024 
1025           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1026 
1027           cnt = 0;
1028           for (l = 0; l < nc; l++) {
1029             for (ii = istart; ii < iend + 1; ii++) {
1030               for (jj = jstart; jj < jend + 1; jj++) {
1031                 for (kk = kstart; kk < kend + 1; kk++) {
1032                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1033                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1034                   }
1035                 }
1036               }
1037             }
1038             rows[l] = l + nc * (slot);
1039           }
1040           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1041         }
1042       }
1043     }
1044     PetscCall(PetscFree(values));
1045     /* do not copy values to GPU since they are all zero and not yet needed there */
1046     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1047     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1048     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1049     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1050     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1051   }
1052   PetscCall(PetscFree2(rows, cols));
1053   PetscFunctionReturn(PETSC_SUCCESS);
1054 }
1055 
1056 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1057 {
1058   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1059   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1060   MPI_Comm               comm;
1061   DMBoundaryType         bx, by;
1062   ISLocalToGlobalMapping ltog, mltog;
1063   DMDAStencilType        st;
1064   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1065 
1066   PetscFunctionBegin;
1067   /*
1068          nc - number of components per grid point
1069          col - number of colors needed in one direction for single component problem
1070   */
1071   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1072   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1073   col = 2 * s + 1;
1074   /*
1075        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1076        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1077   */
1078   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1079   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1080   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1081   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1082   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1083 
1084   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1085   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1086 
1087   PetscCall(MatSetBlockSize(J, nc));
1088   /* determine the matrix preallocation information */
1089   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1090   for (i = xs; i < xs + nx; i++) {
1091     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1092     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1093 
1094     for (j = ys; j < ys + ny; j++) {
1095       slot = i - gxs + gnx * (j - gys);
1096 
1097       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1098       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1099 
1100       cnt = 0;
1101       for (k = 0; k < nc; k++) {
1102         for (l = lstart; l < lend + 1; l++) {
1103           for (p = pstart; p < pend + 1; p++) {
1104             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1105               cols[cnt++] = k + nc * (slot + gnx * l + p);
1106             }
1107           }
1108         }
1109         rows[k] = k + nc * (slot);
1110       }
1111       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1112       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1113     }
1114   }
1115   PetscCall(MatSetBlockSize(J, nc));
1116   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1117   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1118   MatPreallocateEnd(dnz, onz);
1119   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1120   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1121 
1122   /*
1123     For each node in the grid: we get the neighbors in the local (on processor ordering
1124     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1125     PETSc ordering.
1126   */
1127   if (!da->prealloc_only) {
1128     for (i = xs; i < xs + nx; i++) {
1129       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1130       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1131 
1132       for (j = ys; j < ys + ny; j++) {
1133         slot = i - gxs + gnx * (j - gys);
1134 
1135         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1136         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1137 
1138         cnt = 0;
1139         for (l = lstart; l < lend + 1; l++) {
1140           for (p = pstart; p < pend + 1; p++) {
1141             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1142               cols[cnt++] = nc * (slot + gnx * l + p);
1143               for (k = 1; k < nc; k++) {
1144                 cols[cnt] = 1 + cols[cnt - 1];
1145                 cnt++;
1146               }
1147             }
1148           }
1149         }
1150         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1151         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1152       }
1153     }
1154     /* do not copy values to GPU since they are all zero and not yet needed there */
1155     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1156     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1157     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1158     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1159     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1160     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1161     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1162   }
1163   PetscCall(PetscFree2(rows, cols));
1164   PetscFunctionReturn(PETSC_SUCCESS);
1165 }
1166 
1167 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1168 {
1169   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1170   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1171   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1172   DM_DA                 *dd = (DM_DA *)da->data;
1173   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1174   MPI_Comm               comm;
1175   DMBoundaryType         bx, by;
1176   ISLocalToGlobalMapping ltog;
1177   DMDAStencilType        st;
1178   PetscBool              removedups = PETSC_FALSE;
1179 
1180   PetscFunctionBegin;
1181   /*
1182          nc - number of components per grid point
1183          col - number of colors needed in one direction for single component problem
1184   */
1185   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1186   col = 2 * s + 1;
1187   /*
1188        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1189        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1190   */
1191   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1192   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1193   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1194   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1195   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1196 
1197   PetscCall(PetscMalloc1(col * col * nc, &cols));
1198   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1199 
1200   PetscCall(MatSetBlockSize(J, nc));
1201   /* determine the matrix preallocation information */
1202   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1203   for (i = xs; i < xs + nx; i++) {
1204     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1205     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1206 
1207     for (j = ys; j < ys + ny; j++) {
1208       slot = i - gxs + gnx * (j - gys);
1209 
1210       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1211       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1212 
1213       for (k = 0; k < nc; k++) {
1214         cnt = 0;
1215         for (l = lstart; l < lend + 1; l++) {
1216           for (p = pstart; p < pend + 1; p++) {
1217             if (l || p) {
1218               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1219                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1220               }
1221             } else {
1222               if (dfill) {
1223                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1224               } else {
1225                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1226               }
1227             }
1228           }
1229         }
1230         row    = k + nc * (slot);
1231         maxcnt = PetscMax(maxcnt, cnt);
1232         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1233         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1234       }
1235     }
1236   }
1237   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1238   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1239   MatPreallocateEnd(dnz, onz);
1240   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1241 
1242   /*
1243     For each node in the grid: we get the neighbors in the local (on processor ordering
1244     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1245     PETSc ordering.
1246   */
1247   if (!da->prealloc_only) {
1248     for (i = xs; i < xs + nx; i++) {
1249       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1250       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1251 
1252       for (j = ys; j < ys + ny; j++) {
1253         slot = i - gxs + gnx * (j - gys);
1254 
1255         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1256         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1257 
1258         for (k = 0; k < nc; k++) {
1259           cnt = 0;
1260           for (l = lstart; l < lend + 1; l++) {
1261             for (p = pstart; p < pend + 1; p++) {
1262               if (l || p) {
1263                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1264                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1265                 }
1266               } else {
1267                 if (dfill) {
1268                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1269                 } else {
1270                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1271                 }
1272               }
1273             }
1274           }
1275           row = k + nc * (slot);
1276           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1277         }
1278       }
1279     }
1280     /* do not copy values to GPU since they are all zero and not yet needed there */
1281     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1282     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1283     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1284     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1285     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1286   }
1287   PetscCall(PetscFree(cols));
1288   PetscFunctionReturn(PETSC_SUCCESS);
1289 }
1290 
1291 /* ---------------------------------------------------------------------------------*/
1292 
1293 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1294 {
1295   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1296   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1297   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1298   MPI_Comm               comm;
1299   DMBoundaryType         bx, by, bz;
1300   ISLocalToGlobalMapping ltog, mltog;
1301   DMDAStencilType        st;
1302   PetscBool              removedups = PETSC_FALSE;
1303 
1304   PetscFunctionBegin;
1305   /*
1306          nc - number of components per grid point
1307          col - number of colors needed in one direction for single component problem
1308   */
1309   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1310   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1311   col = 2 * s + 1;
1312 
1313   /*
1314        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1315        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1316   */
1317   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1318   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1319   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1320 
1321   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1322   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1323   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1324 
1325   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1326   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1327 
1328   PetscCall(MatSetBlockSize(J, nc));
1329   /* determine the matrix preallocation information */
1330   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1331   for (i = xs; i < xs + nx; i++) {
1332     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1333     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1334     for (j = ys; j < ys + ny; j++) {
1335       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1336       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1337       for (k = zs; k < zs + nz; k++) {
1338         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1339         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1340 
1341         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1342 
1343         cnt = 0;
1344         for (l = 0; l < nc; l++) {
1345           for (ii = istart; ii < iend + 1; ii++) {
1346             for (jj = jstart; jj < jend + 1; jj++) {
1347               for (kk = kstart; kk < kend + 1; kk++) {
1348                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1349                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1350                 }
1351               }
1352             }
1353           }
1354           rows[l] = l + nc * (slot);
1355         }
1356         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1357         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1358       }
1359     }
1360   }
1361   PetscCall(MatSetBlockSize(J, nc));
1362   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1363   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1364   MatPreallocateEnd(dnz, onz);
1365   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1366   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1367 
1368   /*
1369     For each node in the grid: we get the neighbors in the local (on processor ordering
1370     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1371     PETSc ordering.
1372   */
1373   if (!da->prealloc_only) {
1374     for (i = xs; i < xs + nx; i++) {
1375       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1376       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1377       for (j = ys; j < ys + ny; j++) {
1378         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1379         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1380         for (k = zs; k < zs + nz; k++) {
1381           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1382           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1383 
1384           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1385 
1386           cnt = 0;
1387           for (kk = kstart; kk < kend + 1; kk++) {
1388             for (jj = jstart; jj < jend + 1; jj++) {
1389               for (ii = istart; ii < iend + 1; ii++) {
1390                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1391                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1392                   for (l = 1; l < nc; l++) {
1393                     cols[cnt] = 1 + cols[cnt - 1];
1394                     cnt++;
1395                   }
1396                 }
1397               }
1398             }
1399           }
1400           rows[0] = nc * (slot);
1401           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1402           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1403         }
1404       }
1405     }
1406     /* do not copy values to GPU since they are all zero and not yet needed there */
1407     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1408     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1409     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1410     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1411     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1412     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1413   }
1414   PetscCall(PetscFree2(rows, cols));
1415   PetscFunctionReturn(PETSC_SUCCESS);
1416 }
1417 
1418 /* ---------------------------------------------------------------------------------*/
1419 
1420 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1421 {
1422   DM_DA                 *dd = (DM_DA *)da->data;
1423   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1424   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1425   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1426   DMBoundaryType         bx;
1427   ISLocalToGlobalMapping ltog;
1428   PetscMPIInt            rank, size;
1429 
1430   PetscFunctionBegin;
1431   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1432   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1433 
1434   /*
1435          nc - number of components per grid point
1436   */
1437   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1438   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1439   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1440   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1441 
1442   PetscCall(MatSetBlockSize(J, nc));
1443   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1444 
1445   /*
1446         note should be smaller for first and last process with no periodic
1447         does not handle dfill
1448   */
1449   cnt = 0;
1450   /* coupling with process to the left */
1451   for (i = 0; i < s; i++) {
1452     for (j = 0; j < nc; j++) {
1453       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1454       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1455       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1456         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1457         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1458       }
1459       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1460       cnt++;
1461     }
1462   }
1463   for (i = s; i < nx - s; i++) {
1464     for (j = 0; j < nc; j++) {
1465       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1466       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1467       cnt++;
1468     }
1469   }
1470   /* coupling with process to the right */
1471   for (i = nx - s; i < nx; i++) {
1472     for (j = 0; j < nc; j++) {
1473       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1474       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1475       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1476         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1477         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1478       }
1479       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1480       cnt++;
1481     }
1482   }
1483 
1484   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1485   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1486   PetscCall(PetscFree2(cols, ocols));
1487 
1488   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1489   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1490 
1491   /*
1492     For each node in the grid: we get the neighbors in the local (on processor ordering
1493     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1494     PETSc ordering.
1495   */
1496   if (!da->prealloc_only) {
1497     PetscCall(PetscMalloc1(maxcnt, &cols));
1498     row = xs * nc;
1499     /* coupling with process to the left */
1500     for (i = xs; i < xs + s; i++) {
1501       for (j = 0; j < nc; j++) {
1502         cnt = 0;
1503         if (rank) {
1504           for (l = 0; l < s; l++) {
1505             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1506           }
1507         }
1508         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1509           for (l = 0; l < s; l++) {
1510             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1511           }
1512         }
1513         if (dfill) {
1514           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1515         } else {
1516           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1517         }
1518         for (l = 0; l < s; l++) {
1519           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1520         }
1521         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1522         row++;
1523       }
1524     }
1525     for (i = xs + s; i < xs + nx - s; i++) {
1526       for (j = 0; j < nc; j++) {
1527         cnt = 0;
1528         for (l = 0; l < s; l++) {
1529           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1530         }
1531         if (dfill) {
1532           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1533         } else {
1534           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1535         }
1536         for (l = 0; l < s; l++) {
1537           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1538         }
1539         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1540         row++;
1541       }
1542     }
1543     /* coupling with process to the right */
1544     for (i = xs + nx - s; i < xs + nx; i++) {
1545       for (j = 0; j < nc; j++) {
1546         cnt = 0;
1547         for (l = 0; l < s; l++) {
1548           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1549         }
1550         if (dfill) {
1551           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1552         } else {
1553           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1554         }
1555         if (rank < size - 1) {
1556           for (l = 0; l < s; l++) {
1557             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1558           }
1559         }
1560         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1561           for (l = 0; l < s; l++) {
1562             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1563           }
1564         }
1565         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1566         row++;
1567       }
1568     }
1569     PetscCall(PetscFree(cols));
1570     /* do not copy values to GPU since they are all zero and not yet needed there */
1571     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1572     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1573     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1574     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1575     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1576   }
1577   PetscFunctionReturn(PETSC_SUCCESS);
1578 }
1579 
1580 /* ---------------------------------------------------------------------------------*/
1581 
1582 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1583 {
1584   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1585   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1586   PetscInt               istart, iend;
1587   DMBoundaryType         bx;
1588   ISLocalToGlobalMapping ltog, mltog;
1589 
1590   PetscFunctionBegin;
1591   /*
1592          nc - number of components per grid point
1593          col - number of colors needed in one direction for single component problem
1594   */
1595   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1596   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1597   col = 2 * s + 1;
1598 
1599   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1600   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1601 
1602   PetscCall(MatSetBlockSize(J, nc));
1603   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1604   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1605 
1606   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1607   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1608   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1609 
1610   /*
1611     For each node in the grid: we get the neighbors in the local (on processor ordering
1612     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1613     PETSc ordering.
1614   */
1615   if (!da->prealloc_only) {
1616     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1617     for (i = xs; i < xs + nx; i++) {
1618       istart = PetscMax(-s, gxs - i);
1619       iend   = PetscMin(s, gxs + gnx - i - 1);
1620       slot   = i - gxs;
1621 
1622       cnt = 0;
1623       for (i1 = istart; i1 < iend + 1; i1++) {
1624         cols[cnt++] = nc * (slot + i1);
1625         for (l = 1; l < nc; l++) {
1626           cols[cnt] = 1 + cols[cnt - 1];
1627           cnt++;
1628         }
1629       }
1630       rows[0] = nc * (slot);
1631       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1632       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1633     }
1634     /* do not copy values to GPU since they are all zero and not yet needed there */
1635     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1636     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1637     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1638     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1639     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1640     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1641     PetscCall(PetscFree2(rows, cols));
1642   }
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 /* ---------------------------------------------------------------------------------*/
1647 
1648 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1649 {
1650   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1651   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1652   PetscInt               istart, iend;
1653   DMBoundaryType         bx;
1654   ISLocalToGlobalMapping ltog, mltog;
1655 
1656   PetscFunctionBegin;
1657   /*
1658          nc - number of components per grid point
1659          col - number of colors needed in one direction for single component problem
1660   */
1661   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1662   col = 2 * s + 1;
1663 
1664   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1665   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1666 
1667   PetscCall(MatSetBlockSize(J, nc));
1668   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1669 
1670   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1671   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1672   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1673 
1674   /*
1675     For each node in the grid: we get the neighbors in the local (on processor ordering
1676     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1677     PETSc ordering.
1678   */
1679   if (!da->prealloc_only) {
1680     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1681     for (i = xs; i < xs + nx; i++) {
1682       istart = PetscMax(-s, gxs - i);
1683       iend   = PetscMin(s, gxs + gnx - i - 1);
1684       slot   = i - gxs;
1685 
1686       cnt = 0;
1687       for (i1 = istart; i1 < iend + 1; i1++) {
1688         cols[cnt++] = nc * (slot + i1);
1689         for (l = 1; l < nc; l++) {
1690           cols[cnt] = 1 + cols[cnt - 1];
1691           cnt++;
1692         }
1693       }
1694       rows[0] = nc * (slot);
1695       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1696       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1697     }
1698     /* do not copy values to GPU since they are all zero and not yet needed there */
1699     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1700     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1701     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1702     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1703     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1704     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1705     PetscCall(PetscFree2(rows, cols));
1706   }
1707   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1708   PetscFunctionReturn(PETSC_SUCCESS);
1709 }
1710 
1711 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1712 {
1713   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1714   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1715   PetscInt               istart, iend, jstart, jend, ii, jj;
1716   MPI_Comm               comm;
1717   PetscScalar           *values;
1718   DMBoundaryType         bx, by;
1719   DMDAStencilType        st;
1720   ISLocalToGlobalMapping ltog;
1721 
1722   PetscFunctionBegin;
1723   /*
1724      nc - number of components per grid point
1725      col - number of colors needed in one direction for single component problem
1726   */
1727   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1728   col = 2 * s + 1;
1729 
1730   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1731   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1732   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1733 
1734   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1735 
1736   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1737 
1738   /* determine the matrix preallocation information */
1739   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1740   for (i = xs; i < xs + nx; i++) {
1741     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1742     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1743     for (j = ys; j < ys + ny; j++) {
1744       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1745       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1746       slot   = i - gxs + gnx * (j - gys);
1747 
1748       /* Find block columns in block row */
1749       cnt = 0;
1750       for (ii = istart; ii < iend + 1; ii++) {
1751         for (jj = jstart; jj < jend + 1; jj++) {
1752           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1753             cols[cnt++] = slot + ii + gnx * jj;
1754           }
1755         }
1756       }
1757       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1758     }
1759   }
1760   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1761   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1762   MatPreallocateEnd(dnz, onz);
1763 
1764   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1765 
1766   /*
1767     For each node in the grid: we get the neighbors in the local (on processor ordering
1768     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1769     PETSc ordering.
1770   */
1771   if (!da->prealloc_only) {
1772     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1773     for (i = xs; i < xs + nx; i++) {
1774       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1775       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1776       for (j = ys; j < ys + ny; j++) {
1777         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1778         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1779         slot   = i - gxs + gnx * (j - gys);
1780         cnt    = 0;
1781         for (ii = istart; ii < iend + 1; ii++) {
1782           for (jj = jstart; jj < jend + 1; jj++) {
1783             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1784               cols[cnt++] = slot + ii + gnx * jj;
1785             }
1786           }
1787         }
1788         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1789       }
1790     }
1791     PetscCall(PetscFree(values));
1792     /* do not copy values to GPU since they are all zero and not yet needed there */
1793     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1794     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1795     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1796     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1797     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1798   }
1799   PetscCall(PetscFree(cols));
1800   PetscFunctionReturn(PETSC_SUCCESS);
1801 }
1802 
1803 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1804 {
1805   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1806   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1807   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1808   MPI_Comm               comm;
1809   PetscScalar           *values;
1810   DMBoundaryType         bx, by, bz;
1811   DMDAStencilType        st;
1812   ISLocalToGlobalMapping ltog;
1813 
1814   PetscFunctionBegin;
1815   /*
1816          nc - number of components per grid point
1817          col - number of colors needed in one direction for single component problem
1818   */
1819   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1820   col = 2 * s + 1;
1821 
1822   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1823   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1824   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1825 
1826   PetscCall(PetscMalloc1(col * col * col, &cols));
1827 
1828   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1829 
1830   /* determine the matrix preallocation information */
1831   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1832   for (i = xs; i < xs + nx; i++) {
1833     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1834     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1835     for (j = ys; j < ys + ny; j++) {
1836       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1837       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1838       for (k = zs; k < zs + nz; k++) {
1839         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1840         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1841 
1842         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1843 
1844         /* Find block columns in block row */
1845         cnt = 0;
1846         for (ii = istart; ii < iend + 1; ii++) {
1847           for (jj = jstart; jj < jend + 1; jj++) {
1848             for (kk = kstart; kk < kend + 1; kk++) {
1849               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1850                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1851               }
1852             }
1853           }
1854         }
1855         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1856       }
1857     }
1858   }
1859   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1860   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1861   MatPreallocateEnd(dnz, onz);
1862 
1863   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1864 
1865   /*
1866     For each node in the grid: we get the neighbors in the local (on processor ordering
1867     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1868     PETSc ordering.
1869   */
1870   if (!da->prealloc_only) {
1871     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1872     for (i = xs; i < xs + nx; i++) {
1873       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1874       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1875       for (j = ys; j < ys + ny; j++) {
1876         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1877         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1878         for (k = zs; k < zs + nz; k++) {
1879           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1880           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1881 
1882           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1883 
1884           cnt = 0;
1885           for (ii = istart; ii < iend + 1; ii++) {
1886             for (jj = jstart; jj < jend + 1; jj++) {
1887               for (kk = kstart; kk < kend + 1; kk++) {
1888                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1889                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1890                 }
1891               }
1892             }
1893           }
1894           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1895         }
1896       }
1897     }
1898     PetscCall(PetscFree(values));
1899     /* do not copy values to GPU since they are all zero and not yet needed there */
1900     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1901     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1902     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1903     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1904     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1905   }
1906   PetscCall(PetscFree(cols));
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 /*
1911   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1912   identify in the local ordering with periodic domain.
1913 */
1914 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1915 {
1916   PetscInt i, n;
1917 
1918   PetscFunctionBegin;
1919   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1920   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1921   for (i = 0, n = 0; i < *cnt; i++) {
1922     if (col[i] >= *row) col[n++] = col[i];
1923   }
1924   *cnt = n;
1925   PetscFunctionReturn(PETSC_SUCCESS);
1926 }
1927 
1928 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1929 {
1930   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1931   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1932   PetscInt               istart, iend, jstart, jend, ii, jj;
1933   MPI_Comm               comm;
1934   PetscScalar           *values;
1935   DMBoundaryType         bx, by;
1936   DMDAStencilType        st;
1937   ISLocalToGlobalMapping ltog;
1938 
1939   PetscFunctionBegin;
1940   /*
1941      nc - number of components per grid point
1942      col - number of colors needed in one direction for single component problem
1943   */
1944   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1945   col = 2 * s + 1;
1946 
1947   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1948   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1949   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1950 
1951   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1952 
1953   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1954 
1955   /* determine the matrix preallocation information */
1956   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1957   for (i = xs; i < xs + nx; i++) {
1958     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1959     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1960     for (j = ys; j < ys + ny; j++) {
1961       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1962       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1963       slot   = i - gxs + gnx * (j - gys);
1964 
1965       /* Find block columns in block row */
1966       cnt = 0;
1967       for (ii = istart; ii < iend + 1; ii++) {
1968         for (jj = jstart; jj < jend + 1; jj++) {
1969           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1970         }
1971       }
1972       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1973       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1974     }
1975   }
1976   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1977   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1978   MatPreallocateEnd(dnz, onz);
1979 
1980   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1981 
1982   /*
1983     For each node in the grid: we get the neighbors in the local (on processor ordering
1984     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1985     PETSc ordering.
1986   */
1987   if (!da->prealloc_only) {
1988     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1989     for (i = xs; i < xs + nx; i++) {
1990       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1991       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1992       for (j = ys; j < ys + ny; j++) {
1993         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1994         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1995         slot   = i - gxs + gnx * (j - gys);
1996 
1997         /* Find block columns in block row */
1998         cnt = 0;
1999         for (ii = istart; ii < iend + 1; ii++) {
2000           for (jj = jstart; jj < jend + 1; jj++) {
2001             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2002           }
2003         }
2004         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2005         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2006       }
2007     }
2008     PetscCall(PetscFree(values));
2009     /* do not copy values to GPU since they are all zero and not yet needed there */
2010     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2011     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2012     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2013     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2014     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2015   }
2016   PetscCall(PetscFree(cols));
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2021 {
2022   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2023   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2024   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2025   MPI_Comm               comm;
2026   PetscScalar           *values;
2027   DMBoundaryType         bx, by, bz;
2028   DMDAStencilType        st;
2029   ISLocalToGlobalMapping ltog;
2030 
2031   PetscFunctionBegin;
2032   /*
2033      nc - number of components per grid point
2034      col - number of colors needed in one direction for single component problem
2035   */
2036   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2037   col = 2 * s + 1;
2038 
2039   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2040   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2041   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2042 
2043   /* create the matrix */
2044   PetscCall(PetscMalloc1(col * col * col, &cols));
2045 
2046   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2047 
2048   /* determine the matrix preallocation information */
2049   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2050   for (i = xs; i < xs + nx; i++) {
2051     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2052     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2053     for (j = ys; j < ys + ny; j++) {
2054       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2055       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2056       for (k = zs; k < zs + nz; k++) {
2057         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2058         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2059 
2060         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2061 
2062         /* Find block columns in block row */
2063         cnt = 0;
2064         for (ii = istart; ii < iend + 1; ii++) {
2065           for (jj = jstart; jj < jend + 1; jj++) {
2066             for (kk = kstart; kk < kend + 1; kk++) {
2067               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2068             }
2069           }
2070         }
2071         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2072         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2073       }
2074     }
2075   }
2076   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2077   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2078   MatPreallocateEnd(dnz, onz);
2079 
2080   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2081 
2082   /*
2083     For each node in the grid: we get the neighbors in the local (on processor ordering
2084     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2085     PETSc ordering.
2086   */
2087   if (!da->prealloc_only) {
2088     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2089     for (i = xs; i < xs + nx; i++) {
2090       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2091       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2092       for (j = ys; j < ys + ny; j++) {
2093         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2094         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2095         for (k = zs; k < zs + nz; k++) {
2096           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2097           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2098 
2099           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2100 
2101           cnt = 0;
2102           for (ii = istart; ii < iend + 1; ii++) {
2103             for (jj = jstart; jj < jend + 1; jj++) {
2104               for (kk = kstart; kk < kend + 1; kk++) {
2105                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2106               }
2107             }
2108           }
2109           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2110           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2111         }
2112       }
2113     }
2114     PetscCall(PetscFree(values));
2115     /* do not copy values to GPU since they are all zero and not yet needed there */
2116     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2117     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2118     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2119     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2120     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2121   }
2122   PetscCall(PetscFree(cols));
2123   PetscFunctionReturn(PETSC_SUCCESS);
2124 }
2125 
2126 /* ---------------------------------------------------------------------------------*/
2127 
2128 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2129 {
2130   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2131   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2132   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2133   DM_DA                 *dd = (DM_DA *)da->data;
2134   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2135   MPI_Comm               comm;
2136   PetscScalar           *values;
2137   DMBoundaryType         bx, by, bz;
2138   ISLocalToGlobalMapping ltog;
2139   DMDAStencilType        st;
2140   PetscBool              removedups = PETSC_FALSE;
2141 
2142   PetscFunctionBegin;
2143   /*
2144          nc - number of components per grid point
2145          col - number of colors needed in one direction for single component problem
2146   */
2147   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2148   col = 2 * s + 1;
2149 
2150   /*
2151        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2152        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2153   */
2154   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2155   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2156   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2157 
2158   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2159   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2160   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2161 
2162   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2163   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2164 
2165   /* determine the matrix preallocation information */
2166   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2167 
2168   PetscCall(MatSetBlockSize(J, nc));
2169   for (i = xs; i < xs + nx; i++) {
2170     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2171     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2172     for (j = ys; j < ys + ny; j++) {
2173       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2174       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2175       for (k = zs; k < zs + nz; k++) {
2176         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2177         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2178 
2179         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2180 
2181         for (l = 0; l < nc; l++) {
2182           cnt = 0;
2183           for (ii = istart; ii < iend + 1; ii++) {
2184             for (jj = jstart; jj < jend + 1; jj++) {
2185               for (kk = kstart; kk < kend + 1; kk++) {
2186                 if (ii || jj || kk) {
2187                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2188                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2189                   }
2190                 } else {
2191                   if (dfill) {
2192                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2193                   } else {
2194                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2195                   }
2196                 }
2197               }
2198             }
2199           }
2200           row    = l + nc * (slot);
2201           maxcnt = PetscMax(maxcnt, cnt);
2202           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2203           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2204         }
2205       }
2206     }
2207   }
2208   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2209   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2210   MatPreallocateEnd(dnz, onz);
2211   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2212 
2213   /*
2214     For each node in the grid: we get the neighbors in the local (on processor ordering
2215     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2216     PETSc ordering.
2217   */
2218   if (!da->prealloc_only) {
2219     PetscCall(PetscCalloc1(maxcnt, &values));
2220     for (i = xs; i < xs + nx; i++) {
2221       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2222       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2223       for (j = ys; j < ys + ny; j++) {
2224         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2225         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2226         for (k = zs; k < zs + nz; k++) {
2227           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2228           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2229 
2230           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2231 
2232           for (l = 0; l < nc; l++) {
2233             cnt = 0;
2234             for (ii = istart; ii < iend + 1; ii++) {
2235               for (jj = jstart; jj < jend + 1; jj++) {
2236                 for (kk = kstart; kk < kend + 1; kk++) {
2237                   if (ii || jj || kk) {
2238                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2239                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2240                     }
2241                   } else {
2242                     if (dfill) {
2243                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2244                     } else {
2245                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2246                     }
2247                   }
2248                 }
2249               }
2250             }
2251             row = l + nc * (slot);
2252             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2253           }
2254         }
2255       }
2256     }
2257     PetscCall(PetscFree(values));
2258     /* do not copy values to GPU since they are all zero and not yet needed there */
2259     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2260     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2261     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2262     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2263     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2264   }
2265   PetscCall(PetscFree(cols));
2266   PetscFunctionReturn(PETSC_SUCCESS);
2267 }
2268