xref: /petsc/src/dm/impls/da/fdda.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 
2 #include <petsc/private/dmdaimpl.h> /*I      "petscdmda.h"     I*/
3 #include <petscmat.h>
4 #include <petscbt.h>
5 
6 extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7 extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8 extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9 extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
10 
11 /*
12    For ghost i that may be negative or greater than the upper bound this
13   maps it into the 0:m-1 range using periodicity
14 */
15 #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
16 
17 static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18 {
19   PetscInt i, j, nz, *fill;
20 
21   PetscFunctionBegin;
22   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
23 
24   /* count number nonzeros */
25   nz = 0;
26   for (i = 0; i < w; i++) {
27     for (j = 0; j < w; j++) {
28       if (dfill[w * i + j]) nz++;
29     }
30   }
31   PetscCall(PetscMalloc1(nz + w + 1, &fill));
32   /* construct modified CSR storage of nonzero structure */
33   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
34    so fill[1] - fill[0] gives number of nonzeros in first row etc */
35   nz = w + 1;
36   for (i = 0; i < w; i++) {
37     fill[i] = nz;
38     for (j = 0; j < w; j++) {
39       if (dfill[w * i + j]) {
40         fill[nz] = j;
41         nz++;
42       }
43     }
44   }
45   fill[w] = nz;
46 
47   *rfill = fill;
48   PetscFunctionReturn(PETSC_SUCCESS);
49 }
50 
51 static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52 {
53   PetscInt nz;
54 
55   PetscFunctionBegin;
56   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
57 
58   /* Determine number of non-zeros */
59   nz = (dfillsparse[w] - w - 1);
60 
61   /* Allocate space for our copy of the given sparse matrix representation. */
62   PetscCall(PetscMalloc1(nz + w + 1, rfill));
63   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64   PetscFunctionReturn(PETSC_SUCCESS);
65 }
66 
67 static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68 {
69   PetscInt i, k, cnt = 1;
70 
71   PetscFunctionBegin;
72 
73   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74    columns to the left with any nonzeros in them plus 1 */
75   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76   for (i = 0; i < dd->w; i++) {
77     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78   }
79   for (i = 0; i < dd->w; i++) {
80     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81   }
82   PetscFunctionReturn(PETSC_SUCCESS);
83 }
84 
85 /*@
86     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87     of the matrix returned by `DMCreateMatrix()`.
88 
89     Logically Collective on da
90 
91     Input Parameters:
92 +   da - the distributed array
93 .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94 -   ofill - the fill pattern in the off-diagonal blocks
95 
96     Level: developer
97 
98     Notes:
99     This only makes sense when you are doing multicomponent problems but using the
100        `MATMPIAIJ` matrix format
101 
102            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103        representing coupling and 0 entries for missing coupling. For example
104 .vb
105             dfill[9] = {1, 0, 0,
106                         1, 1, 0,
107                         0, 1, 1}
108 .ve
109        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111        diagonal block).
112 
113      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114      can be represented in the dfill, ofill format
115 
116    Contributed by Glenn Hammond
117 
118 .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119 @*/
120 PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121 {
122   DM_DA *dd = (DM_DA *)da->data;
123 
124   PetscFunctionBegin;
125   /* save the given dfill and ofill information */
126   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
128 
129   /* count nonzeros in ofill columns */
130   PetscCall(DMDASetBlockFills_Private2(dd));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*@
135     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137     of fill patterns.
138 
139     Logically Collective on da
140 
141     Input Parameters:
142 +   da - the distributed array
143 .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
144 -   ofill - the sparse fill pattern in the off-diagonal blocks
145 
146     Level: developer
147 
148     Notes:
149     This only makes sense when you are doing multicomponent problems but using the
150        `MATMPIAIJ` matrix format
151 
152            The format for dfill and ofill is a sparse representation of a
153            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154            for missing coupling.  The sparse representation is a 1 dimensional
155            array of length nz + dof + 1, where nz is the number of non-zeros in
156            the matrix.  The first dof entries in the array give the
157            starting array indices of each row's items in the rest of the array,
158            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159            and the remaining nz items give the column indices of each of
160            the 1s within the logical 2D matrix.  Each row's items within
161            the array are the column indices of the 1s within that row
162            of the 2D matrix.  PETSc developers may recognize that this is the
163            same format as that computed by the `DMDASetBlockFills_Private()`
164            function from a dense 2D matrix representation.
165 
166      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167      can be represented in the dfill, ofill format
168 
169    Contributed by Philip C. Roth
170 
171 .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172 @*/
173 PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174 {
175   DM_DA *dd = (DM_DA *)da->data;
176 
177   PetscFunctionBegin;
178   /* save the given dfill and ofill information */
179   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
181 
182   /* count nonzeros in ofill columns */
183   PetscCall(DMDASetBlockFills_Private2(dd));
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188 {
189   PetscInt       dim, m, n, p, nc;
190   DMBoundaryType bx, by, bz;
191   MPI_Comm       comm;
192   PetscMPIInt    size;
193   PetscBool      isBAIJ;
194   DM_DA         *dd = (DM_DA *)da->data;
195 
196   PetscFunctionBegin;
197   /*
198                                   m
199           ------------------------------------------------------
200          |                                                     |
201          |                                                     |
202          |               ----------------------                |
203          |               |                    |                |
204       n  |           yn  |                    |                |
205          |               |                    |                |
206          |               .---------------------                |
207          |             (xs,ys)     xn                          |
208          |            .                                        |
209          |         (gxs,gys)                                   |
210          |                                                     |
211           -----------------------------------------------------
212   */
213 
214   /*
215          nc - number of components per grid point
216          col - number of colors needed in one direction for single component problem
217 
218   */
219   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
220 
221   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222   PetscCallMPI(MPI_Comm_size(comm, &size));
223   if (ctype == IS_COLORING_LOCAL) {
224     if (size == 1) {
225       ctype = IS_COLORING_GLOBAL;
226     } else {
227       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228     }
229   }
230 
231   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232      matrices is for the blocks, not the individual matrix elements  */
233   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236   if (isBAIJ) {
237     dd->w  = 1;
238     dd->xs = dd->xs / nc;
239     dd->xe = dd->xe / nc;
240     dd->Xs = dd->Xs / nc;
241     dd->Xe = dd->Xe / nc;
242   }
243 
244   /*
245      We do not provide a getcoloring function in the DMDA operations because
246    the basic DMDA does not know about matrices. We think of DMDA as being
247    more low-level then matrices.
248   */
249   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253   if (isBAIJ) {
254     dd->w  = nc;
255     dd->xs = dd->xs * nc;
256     dd->xe = dd->xe * nc;
257     dd->Xs = dd->Xs * nc;
258     dd->Xe = dd->Xe * nc;
259   }
260   PetscFunctionReturn(PETSC_SUCCESS);
261 }
262 
263 /* ---------------------------------------------------------------------------------*/
264 
265 PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
266 {
267   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
268   PetscInt         ncolors = 0;
269   MPI_Comm         comm;
270   DMBoundaryType   bx, by;
271   DMDAStencilType  st;
272   ISColoringValue *colors;
273   DM_DA           *dd = (DM_DA *)da->data;
274 
275   PetscFunctionBegin;
276   /*
277          nc - number of components per grid point
278          col - number of colors needed in one direction for single component problem
279 
280   */
281   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
282   col = 2 * s + 1;
283   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
284   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
285   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
286 
287   /* special case as taught to us by Paul Hovland */
288   if (st == DMDA_STENCIL_STAR && s == 1) {
289     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
290   } else {
291     if (ctype == IS_COLORING_GLOBAL) {
292       if (!dd->localcoloring) {
293         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
294         ii = 0;
295         for (j = ys; j < ys + ny; j++) {
296           for (i = xs; i < xs + nx; i++) {
297             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
298           }
299         }
300         ncolors = nc + nc * (col - 1 + col * (col - 1));
301         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
302       }
303       *coloring = dd->localcoloring;
304     } else if (ctype == IS_COLORING_LOCAL) {
305       if (!dd->ghostedcoloring) {
306         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
307         ii = 0;
308         for (j = gys; j < gys + gny; j++) {
309           for (i = gxs; i < gxs + gnx; i++) {
310             for (k = 0; k < nc; k++) {
311               /* the complicated stuff is to handle periodic boundaries */
312               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
313             }
314           }
315         }
316         ncolors = nc + nc * (col - 1 + col * (col - 1));
317         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
318         /* PetscIntView(ncolors,(PetscInt*)colors,0); */
319 
320         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
321       }
322       *coloring = dd->ghostedcoloring;
323     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
324   }
325   PetscCall(ISColoringReference(*coloring));
326   PetscFunctionReturn(PETSC_SUCCESS);
327 }
328 
329 /* ---------------------------------------------------------------------------------*/
330 
331 PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
332 {
333   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
334   PetscInt         ncolors;
335   MPI_Comm         comm;
336   DMBoundaryType   bx, by, bz;
337   DMDAStencilType  st;
338   ISColoringValue *colors;
339   DM_DA           *dd = (DM_DA *)da->data;
340 
341   PetscFunctionBegin;
342   /*
343          nc - number of components per grid point
344          col - number of colors needed in one direction for single component problem
345   */
346   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
347   col = 2 * s + 1;
348   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
349                  by 2*stencil_width + 1\n");
350   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
351                  by 2*stencil_width + 1\n");
352   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
353                  by 2*stencil_width + 1\n");
354 
355   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
356   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
357   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
358 
359   /* create the coloring */
360   if (ctype == IS_COLORING_GLOBAL) {
361     if (!dd->localcoloring) {
362       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
363       ii = 0;
364       for (k = zs; k < zs + nz; k++) {
365         for (j = ys; j < ys + ny; j++) {
366           for (i = xs; i < xs + nx; i++) {
367             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
368           }
369         }
370       }
371       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
372       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
373     }
374     *coloring = dd->localcoloring;
375   } else if (ctype == IS_COLORING_LOCAL) {
376     if (!dd->ghostedcoloring) {
377       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
378       ii = 0;
379       for (k = gzs; k < gzs + gnz; k++) {
380         for (j = gys; j < gys + gny; j++) {
381           for (i = gxs; i < gxs + gnx; i++) {
382             for (l = 0; l < nc; l++) {
383               /* the complicated stuff is to handle periodic boundaries */
384               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
385             }
386           }
387         }
388       }
389       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
390       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
391       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
392     }
393     *coloring = dd->ghostedcoloring;
394   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
395   PetscCall(ISColoringReference(*coloring));
396   PetscFunctionReturn(PETSC_SUCCESS);
397 }
398 
399 /* ---------------------------------------------------------------------------------*/
400 
401 PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
402 {
403   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
404   PetscInt         ncolors;
405   MPI_Comm         comm;
406   DMBoundaryType   bx;
407   ISColoringValue *colors;
408   DM_DA           *dd = (DM_DA *)da->data;
409 
410   PetscFunctionBegin;
411   /*
412          nc - number of components per grid point
413          col - number of colors needed in one direction for single component problem
414   */
415   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
416   col = 2 * s + 1;
417   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
418   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
419   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
420 
421   /* create the coloring */
422   if (ctype == IS_COLORING_GLOBAL) {
423     if (!dd->localcoloring) {
424       PetscCall(PetscMalloc1(nc * nx, &colors));
425       if (dd->ofillcols) {
426         PetscInt tc = 0;
427         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
428         i1 = 0;
429         for (i = xs; i < xs + nx; i++) {
430           for (l = 0; l < nc; l++) {
431             if (dd->ofillcols[l] && (i % col)) {
432               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
433             } else {
434               colors[i1++] = l;
435             }
436           }
437         }
438         ncolors = nc + 2 * s * tc;
439       } else {
440         i1 = 0;
441         for (i = xs; i < xs + nx; i++) {
442           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
443         }
444         ncolors = nc + nc * (col - 1);
445       }
446       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
447     }
448     *coloring = dd->localcoloring;
449   } else if (ctype == IS_COLORING_LOCAL) {
450     if (!dd->ghostedcoloring) {
451       PetscCall(PetscMalloc1(nc * gnx, &colors));
452       i1 = 0;
453       for (i = gxs; i < gxs + gnx; i++) {
454         for (l = 0; l < nc; l++) {
455           /* the complicated stuff is to handle periodic boundaries */
456           colors[i1++] = l + nc * (SetInRange(i, m) % col);
457         }
458       }
459       ncolors = nc + nc * (col - 1);
460       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
461       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
462     }
463     *coloring = dd->ghostedcoloring;
464   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
465   PetscCall(ISColoringReference(*coloring));
466   PetscFunctionReturn(PETSC_SUCCESS);
467 }
468 
469 PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
470 {
471   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
472   PetscInt         ncolors;
473   MPI_Comm         comm;
474   DMBoundaryType   bx, by;
475   ISColoringValue *colors;
476   DM_DA           *dd = (DM_DA *)da->data;
477 
478   PetscFunctionBegin;
479   /*
480          nc - number of components per grid point
481          col - number of colors needed in one direction for single component problem
482   */
483   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
484   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
485   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
486   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
487   /* create the coloring */
488   if (ctype == IS_COLORING_GLOBAL) {
489     if (!dd->localcoloring) {
490       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
491       ii = 0;
492       for (j = ys; j < ys + ny; j++) {
493         for (i = xs; i < xs + nx; i++) {
494           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
495         }
496       }
497       ncolors = 5 * nc;
498       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
499     }
500     *coloring = dd->localcoloring;
501   } else if (ctype == IS_COLORING_LOCAL) {
502     if (!dd->ghostedcoloring) {
503       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
504       ii = 0;
505       for (j = gys; j < gys + gny; j++) {
506         for (i = gxs; i < gxs + gnx; i++) {
507           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
508         }
509       }
510       ncolors = 5 * nc;
511       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
512       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
513     }
514     *coloring = dd->ghostedcoloring;
515   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
516   PetscFunctionReturn(PETSC_SUCCESS);
517 }
518 
519 /* =========================================================================== */
520 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
521 extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
522 extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
523 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
524 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
525 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
526 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
527 extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
528 extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
529 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
530 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
531 extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
532 extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
533 extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
534 
535 /*@C
536    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
537 
538    Logically Collective on mat
539 
540    Input Parameters:
541 +  mat - the matrix
542 -  da - the da
543 
544    Level: intermediate
545 
546 .seealso: `Mat`, `MatSetUp()`
547 @*/
548 PetscErrorCode MatSetupDM(Mat mat, DM da)
549 {
550   PetscFunctionBegin;
551   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
552   PetscValidHeaderSpecificType(da, DM_CLASSID, 2, DMDA);
553   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
558 {
559   DM                da;
560   const char       *prefix;
561   Mat               Anatural;
562   AO                ao;
563   PetscInt          rstart, rend, *petsc, i;
564   IS                is;
565   MPI_Comm          comm;
566   PetscViewerFormat format;
567 
568   PetscFunctionBegin;
569   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
570   PetscCall(PetscViewerGetFormat(viewer, &format));
571   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
572 
573   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
574   PetscCall(MatGetDM(A, &da));
575   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
576 
577   PetscCall(DMDAGetAO(da, &ao));
578   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
579   PetscCall(PetscMalloc1(rend - rstart, &petsc));
580   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
581   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
582   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
583 
584   /* call viewer on natural ordering */
585   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
586   PetscCall(ISDestroy(&is));
587   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
588   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
589   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
590   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
591   PetscCall(MatView(Anatural, viewer));
592   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
593   PetscCall(MatDestroy(&Anatural));
594   PetscFunctionReturn(PETSC_SUCCESS);
595 }
596 
597 PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
598 {
599   DM       da;
600   Mat      Anatural, Aapp;
601   AO       ao;
602   PetscInt rstart, rend, *app, i, m, n, M, N;
603   IS       is;
604   MPI_Comm comm;
605 
606   PetscFunctionBegin;
607   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
608   PetscCall(MatGetDM(A, &da));
609   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
610 
611   /* Load the matrix in natural ordering */
612   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
613   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
614   PetscCall(MatGetSize(A, &M, &N));
615   PetscCall(MatGetLocalSize(A, &m, &n));
616   PetscCall(MatSetSizes(Anatural, m, n, M, N));
617   PetscCall(MatLoad(Anatural, viewer));
618 
619   /* Map natural ordering to application ordering and create IS */
620   PetscCall(DMDAGetAO(da, &ao));
621   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
622   PetscCall(PetscMalloc1(rend - rstart, &app));
623   for (i = rstart; i < rend; i++) app[i - rstart] = i;
624   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
625   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
626 
627   /* Do permutation and replace header */
628   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
629   PetscCall(MatHeaderReplace(A, &Aapp));
630   PetscCall(ISDestroy(&is));
631   PetscCall(MatDestroy(&Anatural));
632   PetscFunctionReturn(PETSC_SUCCESS);
633 }
634 
635 PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
636 {
637   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
638   Mat         A;
639   MPI_Comm    comm;
640   MatType     Atype;
641   MatType     mtype;
642   PetscMPIInt size;
643   DM_DA      *dd    = (DM_DA *)da->data;
644   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
645 
646   PetscFunctionBegin;
647   PetscCall(MatInitializePackage());
648   mtype = da->mattype;
649 
650   /*
651                                   m
652           ------------------------------------------------------
653          |                                                     |
654          |                                                     |
655          |               ----------------------                |
656          |               |                    |                |
657       n  |           ny  |                    |                |
658          |               |                    |                |
659          |               .---------------------                |
660          |             (xs,ys)     nx                          |
661          |            .                                        |
662          |         (gxs,gys)                                   |
663          |                                                     |
664           -----------------------------------------------------
665   */
666 
667   /*
668          nc - number of components per grid point
669          col - number of colors needed in one direction for single component problem
670   */
671   M   = dd->M;
672   N   = dd->N;
673   P   = dd->P;
674   dim = da->dim;
675   dof = dd->w;
676   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
677   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
678   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
679   PetscCall(MatCreate(comm, &A));
680   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
681   PetscCall(MatSetType(A, mtype));
682   PetscCall(MatSetFromOptions(A));
683   if (dof * nx * ny * nz < da->bind_below) {
684     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
685     PetscCall(MatBindToCPU(A, PETSC_TRUE));
686   }
687   PetscCall(MatSetDM(A, da));
688   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
689   PetscCall(MatGetType(A, &Atype));
690   /*
691      We do not provide a getmatrix function in the DMDA operations because
692    the basic DMDA does not know about matrices. We think of DMDA as being more
693    more low-level than matrices. This is kind of cheating but, cause sometimes
694    we think of DMDA has higher level than matrices.
695 
696      We could switch based on Atype (or mtype), but we do not since the
697    specialized setting routines depend only on the particular preallocation
698    details of the matrix, not the type itself.
699   */
700   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
701   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
702   if (!aij) {
703     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
704     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
705     if (!baij) {
706       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
707       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
708       if (!sbaij) {
709         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
710         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
711       }
712       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
713     }
714   }
715   if (aij) {
716     if (dim == 1) {
717       if (dd->ofill) {
718         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
719       } else {
720         DMBoundaryType bx;
721         PetscMPIInt    size;
722         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
723         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
724         if (size == 1 && bx == DM_BOUNDARY_NONE) {
725           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
726         } else {
727           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
728         }
729       }
730     } else if (dim == 2) {
731       if (dd->ofill) {
732         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
733       } else {
734         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
735       }
736     } else if (dim == 3) {
737       if (dd->ofill) {
738         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
739       } else {
740         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
741       }
742     }
743   } else if (baij) {
744     if (dim == 2) {
745       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
746     } else if (dim == 3) {
747       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
748     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749   } else if (sbaij) {
750     if (dim == 2) {
751       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
752     } else if (dim == 3) {
753       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
754     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755   } else if (sell) {
756     if (dim == 2) {
757       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
758     } else if (dim == 3) {
759       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
760     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
761   } else if (is) {
762     PetscCall(DMCreateMatrix_DA_IS(da, A));
763   } else {
764     ISLocalToGlobalMapping ltog;
765 
766     PetscCall(MatSetBlockSize(A, dof));
767     PetscCall(MatSetUp(A));
768     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
769     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
770   }
771   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
772   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
773   PetscCall(MatSetDM(A, da));
774   PetscCallMPI(MPI_Comm_size(comm, &size));
775   if (size > 1) {
776     /* change viewer to display matrix in natural ordering */
777     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
778     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
779   }
780   *J = A;
781   PetscFunctionReturn(PETSC_SUCCESS);
782 }
783 
784 /* ---------------------------------------------------------------------------------*/
785 PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
786 
787 PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
788 {
789   DM_DA                 *da = (DM_DA *)dm->data;
790   Mat                    lJ, P;
791   ISLocalToGlobalMapping ltog;
792   IS                     is;
793   PetscBT                bt;
794   const PetscInt        *e_loc, *idx;
795   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;
796 
797   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
798      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
799   PetscFunctionBegin;
800   dof = da->w;
801   PetscCall(MatSetBlockSize(J, dof));
802   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));
803 
804   /* flag local elements indices in local DMDA numbering */
805   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
806   PetscCall(PetscBTCreate(nv / dof, &bt));
807   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
808   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
809 
810   /* filter out (set to -1) the global indices not used by the local elements */
811   PetscCall(PetscMalloc1(nv / dof, &gidx));
812   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
813   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
814   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
815   for (i = 0; i < nv / dof; i++)
816     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
817   PetscCall(PetscBTDestroy(&bt));
818   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
819   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
820   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
821   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
822   PetscCall(ISDestroy(&is));
823 
824   /* Preallocation */
825   if (dm->prealloc_skip) {
826     PetscCall(MatSetUp(J));
827   } else {
828     PetscCall(MatISGetLocalMat(J, &lJ));
829     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
830     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
831     PetscCall(MatSetType(P, MATPREALLOCATOR));
832     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
833     PetscCall(MatGetSize(lJ, &N, NULL));
834     PetscCall(MatGetLocalSize(lJ, &n, NULL));
835     PetscCall(MatSetSizes(P, n, n, N, N));
836     PetscCall(MatSetBlockSize(P, dof));
837     PetscCall(MatSetUp(P));
838     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
839     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
840     PetscCall(MatISRestoreLocalMat(J, &lJ));
841     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
842     PetscCall(MatDestroy(&P));
843 
844     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
845     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
846   }
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
851 {
852   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
853   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
854   MPI_Comm               comm;
855   PetscScalar           *values;
856   DMBoundaryType         bx, by;
857   ISLocalToGlobalMapping ltog;
858   DMDAStencilType        st;
859 
860   PetscFunctionBegin;
861   /*
862          nc - number of components per grid point
863          col - number of colors needed in one direction for single component problem
864   */
865   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
866   col = 2 * s + 1;
867   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
868   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
869   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
870 
871   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
872   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
873 
874   PetscCall(MatSetBlockSize(J, nc));
875   /* determine the matrix preallocation information */
876   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
877   for (i = xs; i < xs + nx; i++) {
878     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
879     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
880 
881     for (j = ys; j < ys + ny; j++) {
882       slot = i - gxs + gnx * (j - gys);
883 
884       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
885       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
886 
887       cnt = 0;
888       for (k = 0; k < nc; k++) {
889         for (l = lstart; l < lend + 1; l++) {
890           for (p = pstart; p < pend + 1; p++) {
891             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
892               cols[cnt++] = k + nc * (slot + gnx * l + p);
893             }
894           }
895         }
896         rows[k] = k + nc * (slot);
897       }
898       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
899     }
900   }
901   PetscCall(MatSetBlockSize(J, nc));
902   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
903   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
904   MatPreallocateEnd(dnz, onz);
905 
906   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
907 
908   /*
909     For each node in the grid: we get the neighbors in the local (on processor ordering
910     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
911     PETSc ordering.
912   */
913   if (!da->prealloc_only) {
914     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
915     for (i = xs; i < xs + nx; i++) {
916       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
917       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
918 
919       for (j = ys; j < ys + ny; j++) {
920         slot = i - gxs + gnx * (j - gys);
921 
922         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
923         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
924 
925         cnt = 0;
926         for (k = 0; k < nc; k++) {
927           for (l = lstart; l < lend + 1; l++) {
928             for (p = pstart; p < pend + 1; p++) {
929               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
930                 cols[cnt++] = k + nc * (slot + gnx * l + p);
931               }
932             }
933           }
934           rows[k] = k + nc * (slot);
935         }
936         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
937       }
938     }
939     PetscCall(PetscFree(values));
940     /* do not copy values to GPU since they are all zero and not yet needed there */
941     PetscCall(MatBindToCPU(J, PETSC_TRUE));
942     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
943     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
944     PetscCall(MatBindToCPU(J, PETSC_FALSE));
945     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
946   }
947   PetscCall(PetscFree2(rows, cols));
948   PetscFunctionReturn(PETSC_SUCCESS);
949 }
950 
951 PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
952 {
953   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
954   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
955   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
956   MPI_Comm               comm;
957   PetscScalar           *values;
958   DMBoundaryType         bx, by, bz;
959   ISLocalToGlobalMapping ltog;
960   DMDAStencilType        st;
961 
962   PetscFunctionBegin;
963   /*
964          nc - number of components per grid point
965          col - number of colors needed in one direction for single component problem
966   */
967   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
968   col = 2 * s + 1;
969   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
970   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
971   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
972 
973   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
974   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
975 
976   PetscCall(MatSetBlockSize(J, nc));
977   /* determine the matrix preallocation information */
978   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
979   for (i = xs; i < xs + nx; i++) {
980     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
981     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
982     for (j = ys; j < ys + ny; j++) {
983       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
984       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
985       for (k = zs; k < zs + nz; k++) {
986         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
987         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
988 
989         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
990 
991         cnt = 0;
992         for (l = 0; l < nc; l++) {
993           for (ii = istart; ii < iend + 1; ii++) {
994             for (jj = jstart; jj < jend + 1; jj++) {
995               for (kk = kstart; kk < kend + 1; kk++) {
996                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
997                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
998                 }
999               }
1000             }
1001           }
1002           rows[l] = l + nc * (slot);
1003         }
1004         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1005       }
1006     }
1007   }
1008   PetscCall(MatSetBlockSize(J, nc));
1009   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1010   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1011   MatPreallocateEnd(dnz, onz);
1012   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1013 
1014   /*
1015     For each node in the grid: we get the neighbors in the local (on processor ordering
1016     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1017     PETSc ordering.
1018   */
1019   if (!da->prealloc_only) {
1020     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1021     for (i = xs; i < xs + nx; i++) {
1022       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1023       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1024       for (j = ys; j < ys + ny; j++) {
1025         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1026         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1027         for (k = zs; k < zs + nz; k++) {
1028           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1029           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1030 
1031           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1032 
1033           cnt = 0;
1034           for (l = 0; l < nc; l++) {
1035             for (ii = istart; ii < iend + 1; ii++) {
1036               for (jj = jstart; jj < jend + 1; jj++) {
1037                 for (kk = kstart; kk < kend + 1; kk++) {
1038                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1039                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1040                   }
1041                 }
1042               }
1043             }
1044             rows[l] = l + nc * (slot);
1045           }
1046           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1047         }
1048       }
1049     }
1050     PetscCall(PetscFree(values));
1051     /* do not copy values to GPU since they are all zero and not yet needed there */
1052     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1053     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1054     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1055     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1056     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1057   }
1058   PetscCall(PetscFree2(rows, cols));
1059   PetscFunctionReturn(PETSC_SUCCESS);
1060 }
1061 
1062 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1063 {
1064   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1065   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1066   MPI_Comm               comm;
1067   DMBoundaryType         bx, by;
1068   ISLocalToGlobalMapping ltog, mltog;
1069   DMDAStencilType        st;
1070   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1071 
1072   PetscFunctionBegin;
1073   /*
1074          nc - number of components per grid point
1075          col - number of colors needed in one direction for single component problem
1076   */
1077   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1078   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1079   col = 2 * s + 1;
1080   /*
1081        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1082        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1083   */
1084   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1085   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1086   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1087   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1088   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1089 
1090   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1091   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1092 
1093   PetscCall(MatSetBlockSize(J, nc));
1094   /* determine the matrix preallocation information */
1095   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1096   for (i = xs; i < xs + nx; i++) {
1097     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1098     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1099 
1100     for (j = ys; j < ys + ny; j++) {
1101       slot = i - gxs + gnx * (j - gys);
1102 
1103       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1104       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1105 
1106       cnt = 0;
1107       for (k = 0; k < nc; k++) {
1108         for (l = lstart; l < lend + 1; l++) {
1109           for (p = pstart; p < pend + 1; p++) {
1110             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1111               cols[cnt++] = k + nc * (slot + gnx * l + p);
1112             }
1113           }
1114         }
1115         rows[k] = k + nc * (slot);
1116       }
1117       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1118       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1119     }
1120   }
1121   PetscCall(MatSetBlockSize(J, nc));
1122   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1123   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1124   MatPreallocateEnd(dnz, onz);
1125   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1126   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1127 
1128   /*
1129     For each node in the grid: we get the neighbors in the local (on processor ordering
1130     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1131     PETSc ordering.
1132   */
1133   if (!da->prealloc_only) {
1134     for (i = xs; i < xs + nx; i++) {
1135       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1136       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1137 
1138       for (j = ys; j < ys + ny; j++) {
1139         slot = i - gxs + gnx * (j - gys);
1140 
1141         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1142         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1143 
1144         cnt = 0;
1145         for (l = lstart; l < lend + 1; l++) {
1146           for (p = pstart; p < pend + 1; p++) {
1147             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1148               cols[cnt++] = nc * (slot + gnx * l + p);
1149               for (k = 1; k < nc; k++) {
1150                 cols[cnt] = 1 + cols[cnt - 1];
1151                 cnt++;
1152               }
1153             }
1154           }
1155         }
1156         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1157         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1158       }
1159     }
1160     /* do not copy values to GPU since they are all zero and not yet needed there */
1161     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1162     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1163     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1164     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1165     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1166     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1167     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1168   }
1169   PetscCall(PetscFree2(rows, cols));
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1174 {
1175   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1176   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1177   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1178   DM_DA                 *dd = (DM_DA *)da->data;
1179   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1180   MPI_Comm               comm;
1181   DMBoundaryType         bx, by;
1182   ISLocalToGlobalMapping ltog;
1183   DMDAStencilType        st;
1184   PetscBool              removedups = PETSC_FALSE;
1185 
1186   PetscFunctionBegin;
1187   /*
1188          nc - number of components per grid point
1189          col - number of colors needed in one direction for single component problem
1190   */
1191   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1192   col = 2 * s + 1;
1193   /*
1194        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1195        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1196   */
1197   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1198   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1199   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1200   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1201   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1202 
1203   PetscCall(PetscMalloc1(col * col * nc, &cols));
1204   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1205 
1206   PetscCall(MatSetBlockSize(J, nc));
1207   /* determine the matrix preallocation information */
1208   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1209   for (i = xs; i < xs + nx; i++) {
1210     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1211     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1212 
1213     for (j = ys; j < ys + ny; j++) {
1214       slot = i - gxs + gnx * (j - gys);
1215 
1216       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1217       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1218 
1219       for (k = 0; k < nc; k++) {
1220         cnt = 0;
1221         for (l = lstart; l < lend + 1; l++) {
1222           for (p = pstart; p < pend + 1; p++) {
1223             if (l || p) {
1224               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1225                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1226               }
1227             } else {
1228               if (dfill) {
1229                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1230               } else {
1231                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1232               }
1233             }
1234           }
1235         }
1236         row    = k + nc * (slot);
1237         maxcnt = PetscMax(maxcnt, cnt);
1238         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1239         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1240       }
1241     }
1242   }
1243   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1244   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1245   MatPreallocateEnd(dnz, onz);
1246   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1247 
1248   /*
1249     For each node in the grid: we get the neighbors in the local (on processor ordering
1250     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1251     PETSc ordering.
1252   */
1253   if (!da->prealloc_only) {
1254     for (i = xs; i < xs + nx; i++) {
1255       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1256       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1257 
1258       for (j = ys; j < ys + ny; j++) {
1259         slot = i - gxs + gnx * (j - gys);
1260 
1261         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1262         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1263 
1264         for (k = 0; k < nc; k++) {
1265           cnt = 0;
1266           for (l = lstart; l < lend + 1; l++) {
1267             for (p = pstart; p < pend + 1; p++) {
1268               if (l || p) {
1269                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1270                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1271                 }
1272               } else {
1273                 if (dfill) {
1274                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1275                 } else {
1276                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1277                 }
1278               }
1279             }
1280           }
1281           row = k + nc * (slot);
1282           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1283         }
1284       }
1285     }
1286     /* do not copy values to GPU since they are all zero and not yet needed there */
1287     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1288     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1289     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1290     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1291     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1292   }
1293   PetscCall(PetscFree(cols));
1294   PetscFunctionReturn(PETSC_SUCCESS);
1295 }
1296 
1297 /* ---------------------------------------------------------------------------------*/
1298 
1299 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1300 {
1301   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1302   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1303   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1304   MPI_Comm               comm;
1305   DMBoundaryType         bx, by, bz;
1306   ISLocalToGlobalMapping ltog, mltog;
1307   DMDAStencilType        st;
1308   PetscBool              removedups = PETSC_FALSE;
1309 
1310   PetscFunctionBegin;
1311   /*
1312          nc - number of components per grid point
1313          col - number of colors needed in one direction for single component problem
1314   */
1315   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1316   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1317   col = 2 * s + 1;
1318 
1319   /*
1320        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1321        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1322   */
1323   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1324   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1325   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1326 
1327   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1328   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1329   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1330 
1331   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1332   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1333 
1334   PetscCall(MatSetBlockSize(J, nc));
1335   /* determine the matrix preallocation information */
1336   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1337   for (i = xs; i < xs + nx; i++) {
1338     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1339     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1340     for (j = ys; j < ys + ny; j++) {
1341       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1342       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1343       for (k = zs; k < zs + nz; k++) {
1344         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1345         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1346 
1347         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1348 
1349         cnt = 0;
1350         for (l = 0; l < nc; l++) {
1351           for (ii = istart; ii < iend + 1; ii++) {
1352             for (jj = jstart; jj < jend + 1; jj++) {
1353               for (kk = kstart; kk < kend + 1; kk++) {
1354                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1355                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1356                 }
1357               }
1358             }
1359           }
1360           rows[l] = l + nc * (slot);
1361         }
1362         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1363         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1364       }
1365     }
1366   }
1367   PetscCall(MatSetBlockSize(J, nc));
1368   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1369   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1370   MatPreallocateEnd(dnz, onz);
1371   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1372   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1373 
1374   /*
1375     For each node in the grid: we get the neighbors in the local (on processor ordering
1376     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1377     PETSc ordering.
1378   */
1379   if (!da->prealloc_only) {
1380     for (i = xs; i < xs + nx; i++) {
1381       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1382       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1383       for (j = ys; j < ys + ny; j++) {
1384         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1385         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1386         for (k = zs; k < zs + nz; k++) {
1387           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1388           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1389 
1390           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1391 
1392           cnt = 0;
1393           for (kk = kstart; kk < kend + 1; kk++) {
1394             for (jj = jstart; jj < jend + 1; jj++) {
1395               for (ii = istart; ii < iend + 1; ii++) {
1396                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1397                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1398                   for (l = 1; l < nc; l++) {
1399                     cols[cnt] = 1 + cols[cnt - 1];
1400                     cnt++;
1401                   }
1402                 }
1403               }
1404             }
1405           }
1406           rows[0] = nc * (slot);
1407           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1408           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1409         }
1410       }
1411     }
1412     /* do not copy values to GPU since they are all zero and not yet needed there */
1413     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1414     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1415     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1416     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1417     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1418     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1419   }
1420   PetscCall(PetscFree2(rows, cols));
1421   PetscFunctionReturn(PETSC_SUCCESS);
1422 }
1423 
1424 /* ---------------------------------------------------------------------------------*/
1425 
1426 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1427 {
1428   DM_DA                 *dd = (DM_DA *)da->data;
1429   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1430   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1431   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1432   DMBoundaryType         bx;
1433   ISLocalToGlobalMapping ltog;
1434   PetscMPIInt            rank, size;
1435 
1436   PetscFunctionBegin;
1437   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1438   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1439 
1440   /*
1441          nc - number of components per grid point
1442   */
1443   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1444   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1445   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1446   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1447 
1448   PetscCall(MatSetBlockSize(J, nc));
1449   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1450 
1451   /*
1452         note should be smaller for first and last process with no periodic
1453         does not handle dfill
1454   */
1455   cnt = 0;
1456   /* coupling with process to the left */
1457   for (i = 0; i < s; i++) {
1458     for (j = 0; j < nc; j++) {
1459       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1460       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1461       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1462         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1463         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1464       }
1465       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1466       cnt++;
1467     }
1468   }
1469   for (i = s; i < nx - s; i++) {
1470     for (j = 0; j < nc; j++) {
1471       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1472       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1473       cnt++;
1474     }
1475   }
1476   /* coupling with process to the right */
1477   for (i = nx - s; i < nx; i++) {
1478     for (j = 0; j < nc; j++) {
1479       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1480       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1481       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1482         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1483         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1484       }
1485       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1486       cnt++;
1487     }
1488   }
1489 
1490   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1491   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1492   PetscCall(PetscFree2(cols, ocols));
1493 
1494   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1495   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1496 
1497   /*
1498     For each node in the grid: we get the neighbors in the local (on processor ordering
1499     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1500     PETSc ordering.
1501   */
1502   if (!da->prealloc_only) {
1503     PetscCall(PetscMalloc1(maxcnt, &cols));
1504     row = xs * nc;
1505     /* coupling with process to the left */
1506     for (i = xs; i < xs + s; i++) {
1507       for (j = 0; j < nc; j++) {
1508         cnt = 0;
1509         if (rank) {
1510           for (l = 0; l < s; l++) {
1511             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1512           }
1513         }
1514         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1515           for (l = 0; l < s; l++) {
1516             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1517           }
1518         }
1519         if (dfill) {
1520           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1521         } else {
1522           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1523         }
1524         for (l = 0; l < s; l++) {
1525           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1526         }
1527         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1528         row++;
1529       }
1530     }
1531     for (i = xs + s; i < xs + nx - s; i++) {
1532       for (j = 0; j < nc; j++) {
1533         cnt = 0;
1534         for (l = 0; l < s; l++) {
1535           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1536         }
1537         if (dfill) {
1538           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1539         } else {
1540           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1541         }
1542         for (l = 0; l < s; l++) {
1543           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1544         }
1545         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1546         row++;
1547       }
1548     }
1549     /* coupling with process to the right */
1550     for (i = xs + nx - s; i < xs + nx; i++) {
1551       for (j = 0; j < nc; j++) {
1552         cnt = 0;
1553         for (l = 0; l < s; l++) {
1554           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1555         }
1556         if (dfill) {
1557           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1558         } else {
1559           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1560         }
1561         if (rank < size - 1) {
1562           for (l = 0; l < s; l++) {
1563             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1564           }
1565         }
1566         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1567           for (l = 0; l < s; l++) {
1568             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1569           }
1570         }
1571         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1572         row++;
1573       }
1574     }
1575     PetscCall(PetscFree(cols));
1576     /* do not copy values to GPU since they are all zero and not yet needed there */
1577     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1578     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1579     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1580     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1581     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1582   }
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 /* ---------------------------------------------------------------------------------*/
1587 
1588 PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1589 {
1590   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1591   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1592   PetscInt               istart, iend;
1593   DMBoundaryType         bx;
1594   ISLocalToGlobalMapping ltog, mltog;
1595 
1596   PetscFunctionBegin;
1597   /*
1598          nc - number of components per grid point
1599          col - number of colors needed in one direction for single component problem
1600   */
1601   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1602   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1603   col = 2 * s + 1;
1604 
1605   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1606   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1607 
1608   PetscCall(MatSetBlockSize(J, nc));
1609   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1610   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1611 
1612   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1613   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1614   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1615 
1616   /*
1617     For each node in the grid: we get the neighbors in the local (on processor ordering
1618     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1619     PETSc ordering.
1620   */
1621   if (!da->prealloc_only) {
1622     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1623     for (i = xs; i < xs + nx; i++) {
1624       istart = PetscMax(-s, gxs - i);
1625       iend   = PetscMin(s, gxs + gnx - i - 1);
1626       slot   = i - gxs;
1627 
1628       cnt = 0;
1629       for (i1 = istart; i1 < iend + 1; i1++) {
1630         cols[cnt++] = nc * (slot + i1);
1631         for (l = 1; l < nc; l++) {
1632           cols[cnt] = 1 + cols[cnt - 1];
1633           cnt++;
1634         }
1635       }
1636       rows[0] = nc * (slot);
1637       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1638       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1639     }
1640     /* do not copy values to GPU since they are all zero and not yet needed there */
1641     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1642     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1643     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1644     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1645     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1646     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1647     PetscCall(PetscFree2(rows, cols));
1648   }
1649   PetscFunctionReturn(PETSC_SUCCESS);
1650 }
1651 
1652 /* ---------------------------------------------------------------------------------*/
1653 
1654 PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1655 {
1656   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1657   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1658   PetscInt               istart, iend;
1659   DMBoundaryType         bx;
1660   ISLocalToGlobalMapping ltog, mltog;
1661 
1662   PetscFunctionBegin;
1663   /*
1664          nc - number of components per grid point
1665          col - number of colors needed in one direction for single component problem
1666   */
1667   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1668   col = 2 * s + 1;
1669 
1670   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1671   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1672 
1673   PetscCall(MatSetBlockSize(J, nc));
1674   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1675 
1676   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1677   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1678   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1679 
1680   /*
1681     For each node in the grid: we get the neighbors in the local (on processor ordering
1682     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1683     PETSc ordering.
1684   */
1685   if (!da->prealloc_only) {
1686     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1687     for (i = xs; i < xs + nx; i++) {
1688       istart = PetscMax(-s, gxs - i);
1689       iend   = PetscMin(s, gxs + gnx - i - 1);
1690       slot   = i - gxs;
1691 
1692       cnt = 0;
1693       for (i1 = istart; i1 < iend + 1; i1++) {
1694         cols[cnt++] = nc * (slot + i1);
1695         for (l = 1; l < nc; l++) {
1696           cols[cnt] = 1 + cols[cnt - 1];
1697           cnt++;
1698         }
1699       }
1700       rows[0] = nc * (slot);
1701       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1702       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1703     }
1704     /* do not copy values to GPU since they are all zero and not yet needed there */
1705     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1706     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1707     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1708     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1709     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1710     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1711     PetscCall(PetscFree2(rows, cols));
1712   }
1713   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1714   PetscFunctionReturn(PETSC_SUCCESS);
1715 }
1716 
1717 PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1718 {
1719   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1720   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1721   PetscInt               istart, iend, jstart, jend, ii, jj;
1722   MPI_Comm               comm;
1723   PetscScalar           *values;
1724   DMBoundaryType         bx, by;
1725   DMDAStencilType        st;
1726   ISLocalToGlobalMapping ltog;
1727 
1728   PetscFunctionBegin;
1729   /*
1730      nc - number of components per grid point
1731      col - number of colors needed in one direction for single component problem
1732   */
1733   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1734   col = 2 * s + 1;
1735 
1736   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1737   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1738   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1739 
1740   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1741 
1742   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1743 
1744   /* determine the matrix preallocation information */
1745   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1746   for (i = xs; i < xs + nx; i++) {
1747     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1748     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1749     for (j = ys; j < ys + ny; j++) {
1750       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1751       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1752       slot   = i - gxs + gnx * (j - gys);
1753 
1754       /* Find block columns in block row */
1755       cnt = 0;
1756       for (ii = istart; ii < iend + 1; ii++) {
1757         for (jj = jstart; jj < jend + 1; jj++) {
1758           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759             cols[cnt++] = slot + ii + gnx * jj;
1760           }
1761         }
1762       }
1763       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1764     }
1765   }
1766   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1767   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1768   MatPreallocateEnd(dnz, onz);
1769 
1770   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1771 
1772   /*
1773     For each node in the grid: we get the neighbors in the local (on processor ordering
1774     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1775     PETSc ordering.
1776   */
1777   if (!da->prealloc_only) {
1778     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1779     for (i = xs; i < xs + nx; i++) {
1780       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1781       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1782       for (j = ys; j < ys + ny; j++) {
1783         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1784         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1785         slot   = i - gxs + gnx * (j - gys);
1786         cnt    = 0;
1787         for (ii = istart; ii < iend + 1; ii++) {
1788           for (jj = jstart; jj < jend + 1; jj++) {
1789             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1790               cols[cnt++] = slot + ii + gnx * jj;
1791             }
1792           }
1793         }
1794         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1795       }
1796     }
1797     PetscCall(PetscFree(values));
1798     /* do not copy values to GPU since they are all zero and not yet needed there */
1799     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1800     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1801     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1802     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1803     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1804   }
1805   PetscCall(PetscFree(cols));
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1810 {
1811   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1812   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1813   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1814   MPI_Comm               comm;
1815   PetscScalar           *values;
1816   DMBoundaryType         bx, by, bz;
1817   DMDAStencilType        st;
1818   ISLocalToGlobalMapping ltog;
1819 
1820   PetscFunctionBegin;
1821   /*
1822          nc - number of components per grid point
1823          col - number of colors needed in one direction for single component problem
1824   */
1825   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1826   col = 2 * s + 1;
1827 
1828   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1829   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1830   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1831 
1832   PetscCall(PetscMalloc1(col * col * col, &cols));
1833 
1834   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1835 
1836   /* determine the matrix preallocation information */
1837   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1838   for (i = xs; i < xs + nx; i++) {
1839     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1840     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1841     for (j = ys; j < ys + ny; j++) {
1842       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1843       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1844       for (k = zs; k < zs + nz; k++) {
1845         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1846         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1847 
1848         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1849 
1850         /* Find block columns in block row */
1851         cnt = 0;
1852         for (ii = istart; ii < iend + 1; ii++) {
1853           for (jj = jstart; jj < jend + 1; jj++) {
1854             for (kk = kstart; kk < kend + 1; kk++) {
1855               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1856                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1857               }
1858             }
1859           }
1860         }
1861         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1862       }
1863     }
1864   }
1865   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1866   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1867   MatPreallocateEnd(dnz, onz);
1868 
1869   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1870 
1871   /*
1872     For each node in the grid: we get the neighbors in the local (on processor ordering
1873     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1874     PETSc ordering.
1875   */
1876   if (!da->prealloc_only) {
1877     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1878     for (i = xs; i < xs + nx; i++) {
1879       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1880       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1881       for (j = ys; j < ys + ny; j++) {
1882         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1883         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1884         for (k = zs; k < zs + nz; k++) {
1885           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1886           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1887 
1888           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1889 
1890           cnt = 0;
1891           for (ii = istart; ii < iend + 1; ii++) {
1892             for (jj = jstart; jj < jend + 1; jj++) {
1893               for (kk = kstart; kk < kend + 1; kk++) {
1894                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1895                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1896                 }
1897               }
1898             }
1899           }
1900           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1901         }
1902       }
1903     }
1904     PetscCall(PetscFree(values));
1905     /* do not copy values to GPU since they are all zero and not yet needed there */
1906     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1907     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1908     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1909     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1910     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1911   }
1912   PetscCall(PetscFree(cols));
1913   PetscFunctionReturn(PETSC_SUCCESS);
1914 }
1915 
1916 /*
1917   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1918   identify in the local ordering with periodic domain.
1919 */
1920 static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1921 {
1922   PetscInt i, n;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1926   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1927   for (i = 0, n = 0; i < *cnt; i++) {
1928     if (col[i] >= *row) col[n++] = col[i];
1929   }
1930   *cnt = n;
1931   PetscFunctionReturn(PETSC_SUCCESS);
1932 }
1933 
1934 PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1935 {
1936   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1937   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1938   PetscInt               istart, iend, jstart, jend, ii, jj;
1939   MPI_Comm               comm;
1940   PetscScalar           *values;
1941   DMBoundaryType         bx, by;
1942   DMDAStencilType        st;
1943   ISLocalToGlobalMapping ltog;
1944 
1945   PetscFunctionBegin;
1946   /*
1947      nc - number of components per grid point
1948      col - number of colors needed in one direction for single component problem
1949   */
1950   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1951   col = 2 * s + 1;
1952 
1953   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1954   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1955   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1956 
1957   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1958 
1959   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1960 
1961   /* determine the matrix preallocation information */
1962   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1963   for (i = xs; i < xs + nx; i++) {
1964     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1965     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1966     for (j = ys; j < ys + ny; j++) {
1967       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1968       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1969       slot   = i - gxs + gnx * (j - gys);
1970 
1971       /* Find block columns in block row */
1972       cnt = 0;
1973       for (ii = istart; ii < iend + 1; ii++) {
1974         for (jj = jstart; jj < jend + 1; jj++) {
1975           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1976         }
1977       }
1978       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1979       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1980     }
1981   }
1982   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1983   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1984   MatPreallocateEnd(dnz, onz);
1985 
1986   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1987 
1988   /*
1989     For each node in the grid: we get the neighbors in the local (on processor ordering
1990     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1991     PETSc ordering.
1992   */
1993   if (!da->prealloc_only) {
1994     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1995     for (i = xs; i < xs + nx; i++) {
1996       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1997       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1998       for (j = ys; j < ys + ny; j++) {
1999         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2000         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2001         slot   = i - gxs + gnx * (j - gys);
2002 
2003         /* Find block columns in block row */
2004         cnt = 0;
2005         for (ii = istart; ii < iend + 1; ii++) {
2006           for (jj = jstart; jj < jend + 1; jj++) {
2007             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
2008           }
2009         }
2010         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2011         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2012       }
2013     }
2014     PetscCall(PetscFree(values));
2015     /* do not copy values to GPU since they are all zero and not yet needed there */
2016     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2017     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2018     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2019     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2020     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2021   }
2022   PetscCall(PetscFree(cols));
2023   PetscFunctionReturn(PETSC_SUCCESS);
2024 }
2025 
2026 PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2027 {
2028   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2029   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2030   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2031   MPI_Comm               comm;
2032   PetscScalar           *values;
2033   DMBoundaryType         bx, by, bz;
2034   DMDAStencilType        st;
2035   ISLocalToGlobalMapping ltog;
2036 
2037   PetscFunctionBegin;
2038   /*
2039      nc - number of components per grid point
2040      col - number of colors needed in one direction for single component problem
2041   */
2042   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2043   col = 2 * s + 1;
2044 
2045   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2046   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2047   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2048 
2049   /* create the matrix */
2050   PetscCall(PetscMalloc1(col * col * col, &cols));
2051 
2052   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2053 
2054   /* determine the matrix preallocation information */
2055   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2056   for (i = xs; i < xs + nx; i++) {
2057     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2058     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2059     for (j = ys; j < ys + ny; j++) {
2060       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2061       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2062       for (k = zs; k < zs + nz; k++) {
2063         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2064         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2065 
2066         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2067 
2068         /* Find block columns in block row */
2069         cnt = 0;
2070         for (ii = istart; ii < iend + 1; ii++) {
2071           for (jj = jstart; jj < jend + 1; jj++) {
2072             for (kk = kstart; kk < kend + 1; kk++) {
2073               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2074             }
2075           }
2076         }
2077         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2078         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2079       }
2080     }
2081   }
2082   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2083   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2084   MatPreallocateEnd(dnz, onz);
2085 
2086   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2087 
2088   /*
2089     For each node in the grid: we get the neighbors in the local (on processor ordering
2090     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2091     PETSc ordering.
2092   */
2093   if (!da->prealloc_only) {
2094     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2095     for (i = xs; i < xs + nx; i++) {
2096       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2097       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2098       for (j = ys; j < ys + ny; j++) {
2099         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2100         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2101         for (k = zs; k < zs + nz; k++) {
2102           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2103           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2104 
2105           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2106 
2107           cnt = 0;
2108           for (ii = istart; ii < iend + 1; ii++) {
2109             for (jj = jstart; jj < jend + 1; jj++) {
2110               for (kk = kstart; kk < kend + 1; kk++) {
2111                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2112               }
2113             }
2114           }
2115           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2116           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2117         }
2118       }
2119     }
2120     PetscCall(PetscFree(values));
2121     /* do not copy values to GPU since they are all zero and not yet needed there */
2122     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2123     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2124     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2125     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2126     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2127   }
2128   PetscCall(PetscFree(cols));
2129   PetscFunctionReturn(PETSC_SUCCESS);
2130 }
2131 
2132 /* ---------------------------------------------------------------------------------*/
2133 
2134 PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2135 {
2136   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2137   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2138   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2139   DM_DA                 *dd = (DM_DA *)da->data;
2140   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2141   MPI_Comm               comm;
2142   PetscScalar           *values;
2143   DMBoundaryType         bx, by, bz;
2144   ISLocalToGlobalMapping ltog;
2145   DMDAStencilType        st;
2146   PetscBool              removedups = PETSC_FALSE;
2147 
2148   PetscFunctionBegin;
2149   /*
2150          nc - number of components per grid point
2151          col - number of colors needed in one direction for single component problem
2152   */
2153   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2154   col = 2 * s + 1;
2155 
2156   /*
2157        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2158        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2159   */
2160   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2161   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2162   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2163 
2164   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2165   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2166   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2167 
2168   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2169   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
2170 
2171   /* determine the matrix preallocation information */
2172   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2173 
2174   PetscCall(MatSetBlockSize(J, nc));
2175   for (i = xs; i < xs + nx; i++) {
2176     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2177     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2178     for (j = ys; j < ys + ny; j++) {
2179       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2180       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2181       for (k = zs; k < zs + nz; k++) {
2182         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2183         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2184 
2185         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2186 
2187         for (l = 0; l < nc; l++) {
2188           cnt = 0;
2189           for (ii = istart; ii < iend + 1; ii++) {
2190             for (jj = jstart; jj < jend + 1; jj++) {
2191               for (kk = kstart; kk < kend + 1; kk++) {
2192                 if (ii || jj || kk) {
2193                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2194                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2195                   }
2196                 } else {
2197                   if (dfill) {
2198                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2199                   } else {
2200                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2201                   }
2202                 }
2203               }
2204             }
2205           }
2206           row    = l + nc * (slot);
2207           maxcnt = PetscMax(maxcnt, cnt);
2208           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2209           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2210         }
2211       }
2212     }
2213   }
2214   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2215   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2216   MatPreallocateEnd(dnz, onz);
2217   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2218 
2219   /*
2220     For each node in the grid: we get the neighbors in the local (on processor ordering
2221     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2222     PETSc ordering.
2223   */
2224   if (!da->prealloc_only) {
2225     PetscCall(PetscCalloc1(maxcnt, &values));
2226     for (i = xs; i < xs + nx; i++) {
2227       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2228       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2229       for (j = ys; j < ys + ny; j++) {
2230         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2231         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2232         for (k = zs; k < zs + nz; k++) {
2233           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2234           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2235 
2236           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2237 
2238           for (l = 0; l < nc; l++) {
2239             cnt = 0;
2240             for (ii = istart; ii < iend + 1; ii++) {
2241               for (jj = jstart; jj < jend + 1; jj++) {
2242                 for (kk = kstart; kk < kend + 1; kk++) {
2243                   if (ii || jj || kk) {
2244                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2245                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2246                     }
2247                   } else {
2248                     if (dfill) {
2249                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2250                     } else {
2251                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2252                     }
2253                   }
2254                 }
2255               }
2256             }
2257             row = l + nc * (slot);
2258             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2259           }
2260         }
2261       }
2262     }
2263     PetscCall(PetscFree(values));
2264     /* do not copy values to GPU since they are all zero and not yet needed there */
2265     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2266     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2267     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2268     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2269     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2270   }
2271   PetscCall(PetscFree(cols));
2272   PetscFunctionReturn(PETSC_SUCCESS);
2273 }
2274